From 1cbbbf39efab05fae67f59e6ed01bb85061c69e2 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sun, 28 Apr 2019 22:14:51 +0800 Subject: ieee802154: hwsim: Fix error handle path in hwsim_init_module KASAN report this: BUG: unable to handle kernel paging request at fffffbfff834f001 PGD 237fe8067 P4D 237fe8067 PUD 237e64067 PMD 1c968d067 PTE 0 Oops: 0000 [#1] SMP KASAN PTI CPU: 1 PID: 8871 Comm: syz-executor.0 Tainted: G C 5.0.0+ #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:strcmp+0x31/0xa0 lib/string.c:328 Code: 00 00 00 00 fc ff df 55 53 48 83 ec 08 eb 0a 84 db 48 89 ef 74 5a 4c 89 e6 48 89 f8 48 89 fa 48 8d 6f 01 48 c1 e8 03 83 e2 07 <42> 0f b6 04 28 38 d0 7f 04 84 c0 75 50 48 89 f0 48 89 f2 0f b6 5d RSP: 0018:ffff8881e0c57800 EFLAGS: 00010246 RAX: 1ffffffff834f001 RBX: ffffffffc1a78000 RCX: ffffffff827b9503 RDX: 0000000000000000 RSI: ffffffffc1a40008 RDI: ffffffffc1a78008 RBP: ffffffffc1a78009 R08: fffffbfff6a92195 R09: fffffbfff6a92195 R10: ffff8881e0c578b8 R11: fffffbfff6a92194 R12: ffffffffc1a40008 R13: dffffc0000000000 R14: ffffffffc1a3e470 R15: ffffffffc1a40000 FS: 00007fdcc02ff700(0000) GS:ffff8881f7300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffffbfff834f001 CR3: 00000001b3134003 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: genl_family_find_byname+0x7f/0xf0 net/netlink/genetlink.c:104 genl_register_family+0x1e1/0x1070 net/netlink/genetlink.c:333 ? 0xffffffffc1978000 hwsim_init_module+0x6a/0x1000 [mac802154_hwsim] ? 0xffffffffc1978000 ? 0xffffffffc1978000 ? 0xffffffffc1978000 do_one_initcall+0xbc/0x47d init/main.c:887 do_init_module+0x1b5/0x547 kernel/module.c:3456 load_module+0x6405/0x8c10 kernel/module.c:3804 __do_sys_finit_module+0x162/0x190 kernel/module.c:3898 do_syscall_64+0x9f/0x450 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fdcc02fec58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000200 RDI: 0000000000000003 RBP: 00007fdcc02fec70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fdcc02ff6bc R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004 Modules linked in: mac802154_hwsim(+) mac802154 ieee802154 speakup(C) rc_proteus_2309 rtc_rk808 streebog_generic rds vboxguest madera_spi madera da9052_wdt mISDN_core ueagle_atm usbatm atm ir_imon_decoder scsi_transport_sas rc_dntv_live_dvb_t panel_samsung_s6d16d0 drm drm_panel_orientation_quirks lib80211 fb_agm1264k_fl(C) gspca_pac7302 gspca_main videobuf2_v4l2 soundwire_intel_init i2c_dln2 dln2 usbcore hid_gaff 88pm8607 nfnetlink axp20x_i2c axp20x uio pata_marvell pmbus_core snd_sonicvibes gameport snd_pcm snd_opl3_lib snd_timer snd_hwdep snd_mpu401_uart snd_rawmidi snd_seq_device snd soundcore rtc_ds1511 rtc_ds1742 vsock dwc_xlgmac rtc_rx8010 libphy twofish_x86_64_3way twofish_x86_64 twofish_common ad5696_i2c ad5686 lp8788_charger cxd2880_spi dvb_core videobuf2_common videodev media videobuf2_vmalloc videobuf2_memops fbtft(C) sysimgblt sysfillrect syscopyarea fb_sys_fops janz_ican3 firewire_net firewire_core crc_itu_t spi_slave_system_control i2c_matroxfb i2c_algo_bit matroxfb_base fb fbdev matroxfb_DAC1064 matroxfb_accel cfbcopyarea cfbimgblt cfbfillrect matroxfb_Ti3026 matroxfb_g450 g450_pll matroxfb_misc leds_blinkm ti_dac7311 intel_spi_pci intel_spi spi_nor hid_elan hid async_tx rc_cinergy_1400 rc_core intel_ishtp kxcjk_1013 industrialio_triggered_buffer kfifo_buf can_dev intel_th spi_pxa2xx_platform pata_artop vme_ca91cx42 gb_gbphy(C) greybus(C) industrialio mptbase st_drv cmac ttpci_eeprom via_wdt gpio_xra1403 mtd iptable_security iptable_raw iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun joydev mousedev ppdev kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel aes_x86_64 input_leds crypto_simd cryptd glue_helper ide_pci_generic piix psmouse ide_core serio_raw ata_generic i2c_piix4 pata_acpi parport_pc parport floppy rtc_cmos intel_agp intel_gtt agpgart sch_fq_codel ip_tables x_tables sha1_ssse3 sha1_generic ipv6 [last unloaded: speakup] Dumping ftrace buffer: (ftrace buffer empty) CR2: fffffbfff834f001 ---[ end trace 5aa772c793e0e971 ]--- RIP: 0010:strcmp+0x31/0xa0 lib/string.c:328 Code: 00 00 00 00 fc ff df 55 53 48 83 ec 08 eb 0a 84 db 48 89 ef 74 5a 4c 89 e6 48 89 f8 48 89 fa 48 8d 6f 01 48 c1 e8 03 83 e2 07 <42> 0f b6 04 28 38 d0 7f 04 84 c0 75 50 48 89 f0 48 89 f2 0f b6 5d RSP: 0018:ffff8881e0c57800 EFLAGS: 00010246 RAX: 1ffffffff834f001 RBX: ffffffffc1a78000 RCX: ffffffff827b9503 RDX: 0000000000000000 RSI: ffffffffc1a40008 RDI: ffffffffc1a78008 RBP: ffffffffc1a78009 R08: fffffbfff6a92195 R09: fffffbfff6a92195 R10: ffff8881e0c578b8 R11: fffffbfff6a92194 R12: ffffffffc1a40008 R13: dffffc0000000000 R14: ffffffffc1a3e470 R15: ffffffffc1a40000 FS: 00007fdcc02ff700(0000) GS:ffff8881f7300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffffbfff834f001 CR3: 00000001b3134003 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 The error handing path misplace the cleanup in hwsim_init_module, switch the two cleanup functions to fix above issues. Reported-by: Hulk Robot Fixes: f25da51fdc38 ("ieee802154: hwsim: add replacement for fakelb") Signed-off-by: YueHaibing Acked-by: Alexander Aring Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mac802154_hwsim.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index 3b88846de31b..c2b6ffb5771b 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -912,9 +912,9 @@ static __init int hwsim_init_module(void) return 0; platform_drv: - genl_unregister_family(&hwsim_genl_family); -platform_dev: platform_device_unregister(mac802154hwsim_dev); +platform_dev: + genl_unregister_family(&hwsim_genl_family); return rc; } -- cgit v1.2.1 From de166bbe861738c8bc3e5dad5b03f45d7d6ef914 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sun, 28 Apr 2019 23:48:10 +0800 Subject: ieee802154: hwsim: unregister hw while hwsim_subscribe_all_others fails KASAN report this: kernel BUG at net/mac802154/main.c:130! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 0 PID: 19932 Comm: modprobe Not tainted 5.1.0-rc6+ #22 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 RIP: 0010:ieee802154_free_hw+0x2a/0x30 [mac802154] Code: 55 48 8d 57 38 48 89 e5 53 48 89 fb 48 8b 47 38 48 39 c2 75 15 48 8d 7f 48 e8 82 85 16 e1 48 8b 7b 28 e8 f9 ef 83 e2 5b 5d c3 <0f> 0b 0f 1f 40 00 55 48 89 e5 53 48 89 fb 0f b6 86 80 00 00 00 88 RSP: 0018:ffffc90001c7b9f0 EFLAGS: 00010206 RAX: ffff88822df3aa80 RBX: ffff88823143d5c0 RCX: 0000000000000002 RDX: ffff88823143d5f8 RSI: ffff88822b1fabc0 RDI: ffff88823143d5c0 RBP: ffffc90001c7b9f8 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: 00000000fffffff4 R13: ffff88822dea4f50 R14: ffff88823143d7c0 R15: 00000000fffffff4 FS: 00007ff52e999540(0000) GS:ffff888237a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fdc06dba768 CR3: 000000023160a000 CR4: 00000000000006f0 Call Trace: hwsim_add_one+0x2dd/0x540 [mac802154_hwsim] hwsim_probe+0x2f/0xb0 [mac802154_hwsim] platform_drv_probe+0x3a/0x90 ? driver_sysfs_add+0x79/0xb0 really_probe+0x1d4/0x2d0 driver_probe_device+0x50/0xf0 device_driver_attach+0x54/0x60 __driver_attach+0x7e/0xd0 ? device_driver_attach+0x60/0x60 bus_for_each_dev+0x68/0xc0 driver_attach+0x19/0x20 bus_add_driver+0x15e/0x200 driver_register+0x5b/0xf0 __platform_driver_register+0x31/0x40 hwsim_init_module+0x74/0x1000 [mac802154_hwsim] ? 0xffffffffa00e9000 do_one_initcall+0x6c/0x3cc ? kmem_cache_alloc_trace+0x248/0x3b0 do_init_module+0x5b/0x1f1 load_module+0x1db1/0x2690 ? m_show+0x1d0/0x1d0 __do_sys_finit_module+0xc5/0xd0 __x64_sys_finit_module+0x15/0x20 do_syscall_64+0x6b/0x1d0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7ff52e4a2839 Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 1f f6 2c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffffa7b3c08 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 00005647560a2a00 RCX: 00007ff52e4a2839 RDX: 0000000000000000 RSI: 00005647547f3c2e RDI: 0000000000000003 RBP: 00005647547f3c2e R08: 0000000000000000 R09: 00005647560a2a00 R10: 0000000000000003 R11: 0000000000000246 R12: 0000000000000000 R13: 00005647560a2c10 R14: 0000000000040000 R15: 00005647560a2a00 Modules linked in: mac802154_hwsim(+) mac802154 [last unloaded: mac802154_hwsim] In hwsim_add_one, if hwsim_subscribe_all_others fails, we should call ieee802154_unregister_hw to free resources. Reported-by: Hulk Robot Fixes: f25da51fdc38 ("ieee802154: hwsim: add replacement for fakelb") Signed-off-by: YueHaibing Acked-by: Alexander Aring Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mac802154_hwsim.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index c2b6ffb5771b..3d9ffd2188f9 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -813,7 +813,7 @@ static int hwsim_add_one(struct genl_info *info, struct device *dev, err = hwsim_subscribe_all_others(phy); if (err < 0) { mutex_unlock(&hwsim_phys_lock); - goto err_reg; + goto err_subscribe; } } list_add_tail(&phy->list, &hwsim_phys); @@ -823,6 +823,8 @@ static int hwsim_add_one(struct genl_info *info, struct device *dev, return idx; +err_subscribe: + ieee802154_unregister_hw(phy->hw); err_reg: kfree(pib); err_pib: -- cgit v1.2.1 From fb59ee37cfe20d10d19568899d1458a58361246c Mon Sep 17 00:00:00 2001 From: Faiz Abbas Date: Wed, 19 Jun 2019 15:54:54 +0530 Subject: ARM: dts: am57xx: Disable voltage switching for SD card If UHS speed modes are enabled, a compatible SD card switches down to 1.8V during enumeration. If after this a software reboot/crash takes place and on-chip ROM tries to enumerate the SD card, the difference in IO voltages (host @ 3.3V and card @ 1.8V) may end up damaging the card. The fix for this is to have support for power cycling the card in hardware (with a PORz/soft-reset line causing a power cycle of the card). Because the beaglebone X15 (rev A,B and C), am57xx-idks and am57xx-evms don't have this capability, disable voltage switching for these boards. The major effect of this is that the maximum supported speed mode is now high speed(50 MHz) down from SDR104(200 MHz). commit 88a748419b84 ("ARM: dts: am57xx-idk: Remove support for voltage switching for SD card") did this only for idk boards. Do it for all affected boards. Signed-off-by: Faiz Abbas Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am571x-idk.dts | 7 +------ arch/arm/boot/dts/am572x-idk.dts | 7 +------ arch/arm/boot/dts/am574x-idk.dts | 7 +------ arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi | 1 + arch/arm/boot/dts/am57xx-beagle-x15-revb1.dts | 7 +------ arch/arm/boot/dts/am57xx-beagle-x15-revc.dts | 7 +------ 6 files changed, 6 insertions(+), 30 deletions(-) diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts index 66116ad3f9f4..0a043908215c 100644 --- a/arch/arm/boot/dts/am571x-idk.dts +++ b/arch/arm/boot/dts/am571x-idk.dts @@ -178,14 +178,9 @@ }; &mmc1 { - pinctrl-names = "default", "hs", "sdr12", "sdr25", "sdr50", "ddr50", "sdr104"; + pinctrl-names = "default", "hs"; pinctrl-0 = <&mmc1_pins_default_no_clk_pu>; pinctrl-1 = <&mmc1_pins_hs>; - pinctrl-2 = <&mmc1_pins_sdr12>; - pinctrl-3 = <&mmc1_pins_sdr25>; - pinctrl-4 = <&mmc1_pins_sdr50>; - pinctrl-5 = <&mmc1_pins_ddr50_rev20 &mmc1_iodelay_ddr50_conf>; - pinctrl-6 = <&mmc1_pins_sdr104 &mmc1_iodelay_sdr104_rev20_conf>; }; &mmc2 { diff --git a/arch/arm/boot/dts/am572x-idk.dts b/arch/arm/boot/dts/am572x-idk.dts index 4f835222c266..8663a9416af6 100644 --- a/arch/arm/boot/dts/am572x-idk.dts +++ b/arch/arm/boot/dts/am572x-idk.dts @@ -19,14 +19,9 @@ }; &mmc1 { - pinctrl-names = "default", "hs", "sdr12", "sdr25", "sdr50", "ddr50", "sdr104"; + pinctrl-names = "default", "hs"; pinctrl-0 = <&mmc1_pins_default_no_clk_pu>; pinctrl-1 = <&mmc1_pins_hs>; - pinctrl-2 = <&mmc1_pins_sdr12>; - pinctrl-3 = <&mmc1_pins_sdr25>; - pinctrl-4 = <&mmc1_pins_sdr50>; - pinctrl-5 = <&mmc1_pins_ddr50 &mmc1_iodelay_ddr_rev20_conf>; - pinctrl-6 = <&mmc1_pins_sdr104 &mmc1_iodelay_sdr104_rev20_conf>; }; &mmc2 { diff --git a/arch/arm/boot/dts/am574x-idk.dts b/arch/arm/boot/dts/am574x-idk.dts index dc5141c35610..7935d70874ce 100644 --- a/arch/arm/boot/dts/am574x-idk.dts +++ b/arch/arm/boot/dts/am574x-idk.dts @@ -24,14 +24,9 @@ }; &mmc1 { - pinctrl-names = "default", "hs", "sdr12", "sdr25", "sdr50", "ddr50", "sdr104"; + pinctrl-names = "default", "hs"; pinctrl-0 = <&mmc1_pins_default_no_clk_pu>; pinctrl-1 = <&mmc1_pins_hs>; - pinctrl-2 = <&mmc1_pins_default>; - pinctrl-3 = <&mmc1_pins_hs>; - pinctrl-4 = <&mmc1_pins_sdr50>; - pinctrl-5 = <&mmc1_pins_ddr50 &mmc1_iodelay_ddr_conf>; - pinctrl-6 = <&mmc1_pins_ddr50 &mmc1_iodelay_sdr104_conf>; }; &mmc2 { diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi index 2341a56ebab9..0cdfd2853ba8 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi +++ b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi @@ -433,6 +433,7 @@ bus-width = <4>; cd-gpios = <&gpio6 27 GPIO_ACTIVE_LOW>; /* gpio 219 */ + no-1-8-v; }; &mmc2 { diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-revb1.dts b/arch/arm/boot/dts/am57xx-beagle-x15-revb1.dts index 5a77b334923d..34c69965821b 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15-revb1.dts +++ b/arch/arm/boot/dts/am57xx-beagle-x15-revb1.dts @@ -19,14 +19,9 @@ }; &mmc1 { - pinctrl-names = "default", "hs", "sdr12", "sdr25", "sdr50", "ddr50", "sdr104"; + pinctrl-names = "default", "hs"; pinctrl-0 = <&mmc1_pins_default>; pinctrl-1 = <&mmc1_pins_hs>; - pinctrl-2 = <&mmc1_pins_sdr12>; - pinctrl-3 = <&mmc1_pins_sdr25>; - pinctrl-4 = <&mmc1_pins_sdr50>; - pinctrl-5 = <&mmc1_pins_ddr50 &mmc1_iodelay_ddr_rev11_conf>; - pinctrl-6 = <&mmc1_pins_sdr104 &mmc1_iodelay_sdr104_rev11_conf>; vmmc-supply = <&vdd_3v3>; vqmmc-supply = <&ldo1_reg>; }; diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-revc.dts b/arch/arm/boot/dts/am57xx-beagle-x15-revc.dts index 17c41da3b55f..ccd99160bbdf 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15-revc.dts +++ b/arch/arm/boot/dts/am57xx-beagle-x15-revc.dts @@ -19,14 +19,9 @@ }; &mmc1 { - pinctrl-names = "default", "hs", "sdr12", "sdr25", "sdr50", "ddr50", "sdr104"; + pinctrl-names = "default", "hs"; pinctrl-0 = <&mmc1_pins_default>; pinctrl-1 = <&mmc1_pins_hs>; - pinctrl-2 = <&mmc1_pins_sdr12>; - pinctrl-3 = <&mmc1_pins_sdr25>; - pinctrl-4 = <&mmc1_pins_sdr50>; - pinctrl-5 = <&mmc1_pins_ddr50 &mmc1_iodelay_ddr_rev20_conf>; - pinctrl-6 = <&mmc1_pins_sdr104 &mmc1_iodelay_sdr104_rev20_conf>; vmmc-supply = <&vdd_3v3>; vqmmc-supply = <&ldo1_reg>; }; -- cgit v1.2.1 From fa3a03da549a889fc9dbc0d3c5908eb7882cac8f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 7 Jul 2019 22:15:13 +0200 Subject: batman-adv: Fix netlink dumping of all mcast_flags buckets The bucket variable is only updated outside the loop over the mcast_flags buckets. It will only be updated during a dumping run when the dumping has to be interrupted and a new message has to be started. This could result in repeated or missing entries when the multicast flags are dumped to userspace. Fixes: d2d489b7d851 ("batman-adv: Add inconsistent multicast netlink dump detection") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 67d7f83009ae..a3488cfb3d1e 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -2303,7 +2303,7 @@ __batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, while (bucket_tmp < hash->size) { if (batadv_mcast_flags_dump_bucket(msg, portid, cb, hash, - *bucket, &idx_tmp)) + bucket_tmp, &idx_tmp)) break; bucket_tmp++; -- cgit v1.2.1 From f7af86ccf1882084293b11077deec049fd01da63 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 7 Jul 2019 23:04:57 +0200 Subject: batman-adv: Fix deletion of RTR(4|6) mcast list entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The multicast code uses the lists bat_priv->mcast.want_all_rtr*_list to store all all originator nodes which don't have the flag no-RTR4 or no-RTR6 set. When an originator is purged, it has to be removed from these lists. Since all entries without the BATADV_MCAST_WANT_NO_RTR4/6 are stored in these lists, they have to be handled like entries which have these flags set to force the update routines to remove them from the lists when purging the originator. Not doing so will leave a pointer to a freed memory region inside the list. Trying to operate on these lists will then cause an use-after-free error: BUG: KASAN: use-after-free in batadv_mcast_want_rtr4_update+0x335/0x3a0 [batman_adv] Write of size 8 at addr ffff888007b41a38 by task swapper/0/0 Fixes: 61caf3d109f5 ("batman-adv: mcast: detect, distribute and maintain multicast router presence") Signed-off-by: Sven Eckelmann Acked-by: Linus Lüssing Signed-off-by: Simon Wunderlich --- net/batman-adv/multicast.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index a3488cfb3d1e..1d5bdf3a4b65 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -2420,8 +2420,10 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig) batadv_mcast_want_unsnoop_update(bat_priv, orig, BATADV_NO_FLAGS); batadv_mcast_want_ipv4_update(bat_priv, orig, BATADV_NO_FLAGS); batadv_mcast_want_ipv6_update(bat_priv, orig, BATADV_NO_FLAGS); - batadv_mcast_want_rtr4_update(bat_priv, orig, BATADV_NO_FLAGS); - batadv_mcast_want_rtr6_update(bat_priv, orig, BATADV_NO_FLAGS); + batadv_mcast_want_rtr4_update(bat_priv, orig, + BATADV_MCAST_WANT_NO_RTR4); + batadv_mcast_want_rtr6_update(bat_priv, orig, + BATADV_MCAST_WANT_NO_RTR6); spin_unlock_bh(&orig->mcast_handler_lock); } -- cgit v1.2.1 From e32db73c5aca895a43061cf6621076aa798530e3 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 18 Jul 2019 21:24:55 +0200 Subject: MAINTAINERS: Update my email address Use my kernel.org address instead of the bootlin one. Signed-off-by: Maxime Ripard --- .mailmap | 2 ++ MAINTAINERS | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.mailmap b/.mailmap index 0fef932de3db..509d258a9e77 100644 --- a/.mailmap +++ b/.mailmap @@ -157,6 +157,8 @@ Matt Ranostay Matthew Ranostay Matt Ranostay Matt Redfearn +Maxime Ripard +Maxime Ripard Mayuresh Janorkar Michael Buesch Michel Dänzer diff --git a/MAINTAINERS b/MAINTAINERS index 783569e3c4b4..8743615770a9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -683,7 +683,7 @@ S: Maintained F: drivers/crypto/sunxi-ss/ ALLWINNER VPU DRIVER -M: Maxime Ripard +M: Maxime Ripard M: Paul Kocialkowski L: linux-media@vger.kernel.org S: Maintained @@ -1408,7 +1408,7 @@ S: Maintained F: drivers/clk/sunxi/ ARM/Allwinner sunXi SoC support -M: Maxime Ripard +M: Maxime Ripard M: Chen-Yu Tsai L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@ -3573,7 +3573,7 @@ F: Documentation/filesystems/caching/cachefiles.txt F: fs/cachefiles/ CADENCE MIPI-CSI2 BRIDGES -M: Maxime Ripard +M: Maxime Ripard L: linux-media@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/media/cdns,*.txt @@ -5291,7 +5291,7 @@ F: include/linux/vga* DRM DRIVERS AND MISC GPU PATCHES M: Maarten Lankhorst -M: Maxime Ripard +M: Maxime Ripard M: Sean Paul W: https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html S: Maintained @@ -5304,7 +5304,7 @@ F: include/uapi/drm/drm* F: include/linux/vga* DRM DRIVERS FOR ALLWINNER A10 -M: Maxime Ripard +M: Maxime Ripard L: dri-devel@lists.freedesktop.org S: Supported F: drivers/gpu/drm/sun4i/ -- cgit v1.2.1 From afd58b162e48076e3fe66d08a69eefbd6fe71643 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 22 Jul 2019 03:44:52 -0700 Subject: ARM: OMAP2+: Fix missing SYSC_HAS_RESET_STATUS for dra7 epwmss TRM says PWMSS_SYSCONFIG bit for SOFTRESET changes to zero when reset is completed. Let's configure it as otherwise we get warnings on boot when we check the data against dts provided data. Eventually the legacy platform data will be just dropped, but let's fix the warning first. Reviewed-by: Suman Anna Tested-by: Keerthy Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod_7xx_data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c index 4a5b4aee6615..1ec21e9ba1e9 100644 --- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c @@ -379,7 +379,8 @@ static struct omap_hwmod dra7xx_dcan2_hwmod = { static struct omap_hwmod_class_sysconfig dra7xx_epwmss_sysc = { .rev_offs = 0x0, .sysc_offs = 0x4, - .sysc_flags = SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET, + .sysc_flags = SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET | + SYSC_HAS_RESET_STATUS, .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART), .sysc_fields = &omap_hwmod_sysc_type2, }; -- cgit v1.2.1 From 6ee8241d17c68b94a91efabfd6bdfe63bb1b79c1 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 22 Jul 2019 03:44:42 -0700 Subject: bus: ti-sysc: Fix handling of forced idle For some devices we can get the following warning on boot: ti-sysc 48485200.target-module: sysc_disable_module: invalid midlemode Fix this by treating SYSC_IDLE_FORCE like we do for the other bits for idlemodes mask. Fixes: d59b60564cbf ("bus: ti-sysc: Add generic enable/disable functions") Cc: Roger Quadros Reviewed-by: Suman Anna Tested-by: Keerthy Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index e6deabd8305d..78fb52e1582e 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -949,7 +949,7 @@ static int sysc_best_idle_mode(u32 idlemodes, u32 *best_mode) *best_mode = SYSC_IDLE_SMART_WKUP; else if (idlemodes & BIT(SYSC_IDLE_SMART)) *best_mode = SYSC_IDLE_SMART; - else if (idlemodes & SYSC_IDLE_FORCE) + else if (idlemodes & BIT(SYSC_IDLE_FORCE)) *best_mode = SYSC_IDLE_FORCE; else return -EINVAL; -- cgit v1.2.1 From e212abd452a4af3174fcd469d46656f83e135a19 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 22 Jul 2019 03:44:52 -0700 Subject: bus: ti-sysc: Fix using configured sysc mask value We have cases where there are no softreset bits like with am335x lcdc. In that case ti,sysc-mask = <0> needs to be handled properly. Tested-by: Keerthy Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 78fb52e1582e..4963c7733554 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -1692,10 +1692,7 @@ static int sysc_init_sysc_mask(struct sysc *ddata) if (error) return 0; - if (val) - ddata->cfg.sysc_val = val & ddata->cap->sysc_mask; - else - ddata->cfg.sysc_val = ddata->cap->sysc_mask; + ddata->cfg.sysc_val = val & ddata->cap->sysc_mask; return 0; } -- cgit v1.2.1 From 2e8647bbe1c8233a20c32fd2648258f2c05c7335 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 22 Jul 2019 03:44:47 -0700 Subject: ARM: dts: Fix flags for gpio7 The ti,no-idle-on-init and ti,no-reset-on-init flags need to be at the interconnect target module level for the modules that have it defined. Otherwise we get the following warnings: dts flag should be at module level for ti,no-idle-on-init dts flag should be at module level for ti,no-reset-on-init Reviewed-by: Suman Anna Tested-by: Keerthy Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi | 2 +- arch/arm/boot/dts/dra7-evm.dts | 2 +- arch/arm/boot/dts/dra7-l4.dtsi | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi index d02f5fa61e5f..df00d1756dae 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi +++ b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi @@ -379,7 +379,7 @@ }; }; -&gpio7 { +&gpio7_target { ti,no-reset-on-init; ti,no-idle-on-init; }; diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts index 714e971b912a..de7f85efaa51 100644 --- a/arch/arm/boot/dts/dra7-evm.dts +++ b/arch/arm/boot/dts/dra7-evm.dts @@ -498,7 +498,7 @@ phy-supply = <&ldousb_reg>; }; -&gpio7 { +&gpio7_target { ti,no-reset-on-init; ti,no-idle-on-init; }; diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index 23faedec08ab..63628e166c0c 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -1261,7 +1261,7 @@ }; }; - target-module@51000 { /* 0x48051000, ap 45 2e.0 */ + gpio7_target: target-module@51000 { /* 0x48051000, ap 45 2e.0 */ compatible = "ti,sysc-omap2", "ti,sysc"; ti,hwmods = "gpio7"; reg = <0x51000 0x4>, -- cgit v1.2.1 From 89bbc6f1eb90809b1538b3a9c54030c558180e3b Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 22 Jul 2019 03:44:42 -0700 Subject: ARM: dts: Fix incorrect dcan register mapping for am3, am4 and dra7 We are currently using a wrong register for dcan revision. Although this is currently only used for detecting the dcan module, let's fix it to avoid confusion. Tested-by: Keerthy Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am33xx-l4.dtsi | 4 ++++ arch/arm/boot/dts/am437x-l4.dtsi | 4 ++++ arch/arm/boot/dts/dra7-l4.dtsi | 4 ++-- drivers/bus/ti-sysc.c | 3 ++- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi index ced1a19d5f89..4bd22c1edf96 100644 --- a/arch/arm/boot/dts/am33xx-l4.dtsi +++ b/arch/arm/boot/dts/am33xx-l4.dtsi @@ -1758,6 +1758,8 @@ target-module@cc000 { /* 0x481cc000, ap 60 46.0 */ compatible = "ti,sysc-omap4", "ti,sysc"; + reg = <0xcc020 0x4>; + reg-names = "rev"; ti,hwmods = "d_can0"; /* Domains (P, C): per_pwrdm, l4ls_clkdm */ clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN0_CLKCTRL 0>, @@ -1780,6 +1782,8 @@ target-module@d0000 { /* 0x481d0000, ap 62 42.0 */ compatible = "ti,sysc-omap4", "ti,sysc"; + reg = <0xd0020 0x4>; + reg-names = "rev"; ti,hwmods = "d_can1"; /* Domains (P, C): per_pwrdm, l4ls_clkdm */ clocks = <&l4ls_clkctrl AM3_L4LS_D_CAN1_CLKCTRL 0>, diff --git a/arch/arm/boot/dts/am437x-l4.dtsi b/arch/arm/boot/dts/am437x-l4.dtsi index 989cb60b9029..04bee4ff9dcb 100644 --- a/arch/arm/boot/dts/am437x-l4.dtsi +++ b/arch/arm/boot/dts/am437x-l4.dtsi @@ -1574,6 +1574,8 @@ target-module@cc000 { /* 0x481cc000, ap 50 46.0 */ compatible = "ti,sysc-omap4", "ti,sysc"; + reg = <0xcc020 0x4>; + reg-names = "rev"; ti,hwmods = "d_can0"; /* Domains (P, C): per_pwrdm, l4ls_clkdm */ clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN0_CLKCTRL 0>; @@ -1593,6 +1595,8 @@ target-module@d0000 { /* 0x481d0000, ap 52 3a.0 */ compatible = "ti,sysc-omap4", "ti,sysc"; + reg = <0xd0020 0x4>; + reg-names = "rev"; ti,hwmods = "d_can1"; /* Domains (P, C): per_pwrdm, l4ls_clkdm */ clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN1_CLKCTRL 0>; diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index 63628e166c0c..21e5914fdd62 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -3025,7 +3025,7 @@ target-module@80000 { /* 0x48480000, ap 31 16.0 */ compatible = "ti,sysc-omap4", "ti,sysc"; - reg = <0x80000 0x4>; + reg = <0x80020 0x4>; reg-names = "rev"; clocks = <&l4per2_clkctrl DRA7_L4PER2_DCAN2_CLKCTRL 0>; clock-names = "fck"; @@ -4577,7 +4577,7 @@ target-module@c000 { /* 0x4ae3c000, ap 30 04.0 */ compatible = "ti,sysc-omap4", "ti,sysc"; - reg = <0xc000 0x4>; + reg = <0xc020 0x4>; reg-names = "rev"; clocks = <&wkupaon_clkctrl DRA7_WKUPAON_DCAN1_CLKCTRL 0>; clock-names = "fck"; diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 4963c7733554..a319e1a748fe 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -1267,7 +1267,8 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = { SYSC_QUIRK("control", 0, 0, 0x10, -1, 0x40000900, 0xffffffff, 0), SYSC_QUIRK("cpgmac", 0, 0x1200, 0x1208, 0x1204, 0x4edb1902, 0xffff00f0, 0), - SYSC_QUIRK("dcan", 0, 0, -1, -1, 0xffffffff, 0xffffffff, 0), + SYSC_QUIRK("dcan", 0, 0x20, -1, -1, 0xa3170504, 0xffffffff, 0), + SYSC_QUIRK("dcan", 0, 0x20, -1, -1, 0x4edb1902, 0xffffffff, 0), SYSC_QUIRK("dmic", 0, 0, 0x10, -1, 0x50010000, 0xffffffff, 0), SYSC_QUIRK("dwc3", 0, 0, 0x10, -1, 0x500a0200, 0xffffffff, 0), SYSC_QUIRK("epwmss", 0, 0, 0x4, -1, 0x47400001, 0xffffffff, 0), -- cgit v1.2.1 From 5d01ab7bac467edfc530e6ccf953921def935c62 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 26 Jul 2019 14:24:38 -0700 Subject: libbpf: fix erroneous multi-closing of BTF FD Libbpf stores associated BTF FD per each instance of bpf_program. When program is unloaded, that FD is closed. This is wrong, because leads to a race and possibly closing of unrelated files, if application simultaneously opens new files while bpf_programs are unloaded. It's also unnecessary, because struct btf "owns" that FD, and btf__free(), called from bpf_object__close() will close it. Thus the fix is to never have per-program BTF FD and fetch it from obj->btf, when necessary. Fixes: 2993e0515bb4 ("tools/bpf: add support to read .BTF.ext sections") Reported-by: Andrey Ignatov Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2586b6cb8f34..6718d0b90130 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -182,7 +182,6 @@ struct bpf_program { bpf_program_clear_priv_t clear_priv; enum bpf_attach_type expected_attach_type; - int btf_fd; void *func_info; __u32 func_info_rec_size; __u32 func_info_cnt; @@ -313,7 +312,6 @@ void bpf_program__unload(struct bpf_program *prog) prog->instances.nr = -1; zfree(&prog->instances.fds); - zclose(prog->btf_fd); zfree(&prog->func_info); zfree(&prog->line_info); } @@ -392,7 +390,6 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, prog->instances.fds = NULL; prog->instances.nr = -1; prog->type = BPF_PROG_TYPE_UNSPEC; - prog->btf_fd = -1; return 0; errout: @@ -2288,9 +2285,6 @@ bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj, prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext); } - if (!insn_offset) - prog->btf_fd = btf__fd(obj->btf); - return 0; } @@ -2463,7 +2457,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, char *cp, errmsg[STRERR_BUFSIZE]; int log_buf_size = BPF_LOG_BUF_SIZE; char *log_buf; - int ret; + int btf_fd, ret; if (!insns || !insns_cnt) return -EINVAL; @@ -2478,7 +2472,8 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.license = license; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; - load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0; + btf_fd = bpf_object__btf_fd(prog->obj); + load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; load_attr.func_info_cnt = prog->func_info_cnt; -- cgit v1.2.1 From 9cdde85804833af77c6afbf7c53f0d959c42eb9f Mon Sep 17 00:00:00 2001 From: Hyungwoo Yang Date: Wed, 29 May 2019 21:03:54 -0700 Subject: platform/chrome: cros_ec_ishtp: fix crash during suspend Kernel crashes during suspend due to wrong conversion in suspend and resume functions. Use the proper helper to get ishtp_cl_device instance. Cc: # 5.2.x: b12bbdc5: HID: intel-ish-hid: fix wrong driver_data usage Signed-off-by: Hyungwoo Yang Signed-off-by: Enric Balletbo i Serra --- drivers/platform/chrome/cros_ec_ishtp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/chrome/cros_ec_ishtp.c b/drivers/platform/chrome/cros_ec_ishtp.c index e504d255d5ce..430731cdf827 100644 --- a/drivers/platform/chrome/cros_ec_ishtp.c +++ b/drivers/platform/chrome/cros_ec_ishtp.c @@ -707,7 +707,7 @@ static int cros_ec_ishtp_reset(struct ishtp_cl_device *cl_device) */ static int __maybe_unused cros_ec_ishtp_suspend(struct device *device) { - struct ishtp_cl_device *cl_device = dev_get_drvdata(device); + struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device); struct ishtp_cl *cros_ish_cl = ishtp_get_drvdata(cl_device); struct ishtp_cl_data *client_data = ishtp_get_client_data(cros_ish_cl); @@ -722,7 +722,7 @@ static int __maybe_unused cros_ec_ishtp_suspend(struct device *device) */ static int __maybe_unused cros_ec_ishtp_resume(struct device *device) { - struct ishtp_cl_device *cl_device = dev_get_drvdata(device); + struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device); struct ishtp_cl *cros_ish_cl = ishtp_get_drvdata(cl_device); struct ishtp_cl_data *client_data = ishtp_get_client_data(cros_ish_cl); -- cgit v1.2.1 From 38f054d549a869f22a02224cd276a27bf14b6171 Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Tue, 23 Jul 2019 15:26:28 +0200 Subject: modules: always page-align module section allocations Some arches (e.g., arm64, x86) have moved towards non-executable module_alloc() allocations for security hardening reasons. That means that the module loader will need to set the text section of a module to executable, regardless of whether or not CONFIG_STRICT_MODULE_RWX is set. When CONFIG_STRICT_MODULE_RWX=y, module section allocations are always page-aligned to handle memory rwx permissions. On some arches with CONFIG_STRICT_MODULE_RWX=n however, when setting the module text to executable, the BUG_ON() in frob_text() gets triggered since module section allocations are not page-aligned when CONFIG_STRICT_MODULE_RWX=n. Since the set_memory_* API works with pages, and since we need to call set_memory_x() regardless of whether CONFIG_STRICT_MODULE_RWX is set, we might as well page-align all module section allocations for ease of managing rwx permissions of module sections (text, rodata, etc). Fixes: 2eef1399a866 ("modules: fix BUG when load module with rodata=n") Reported-by: Martin Kaiser Reported-by: Bartosz Golaszewski Tested-by: David Lechner Tested-by: Martin Kaiser Tested-by: Bartosz Golaszewski Signed-off-by: Jessica Yu --- kernel/module.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index 5933395af9a0..cd8df516666d 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -64,14 +64,9 @@ /* * Modules' sections will be aligned on page boundaries - * to ensure complete separation of code and data, but - * only when CONFIG_STRICT_MODULE_RWX=y + * to ensure complete separation of code and data */ -#ifdef CONFIG_STRICT_MODULE_RWX # define debug_align(X) ALIGN(X, PAGE_SIZE) -#else -# define debug_align(X) (X) -#endif /* If this is set, the section belongs in the init part of the module */ #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) -- cgit v1.2.1 From 56fbc24116f458a0ea48f9f37fe770fd791042d9 Mon Sep 17 00:00:00 2001 From: Takshak Chahande Date: Wed, 31 Jul 2019 15:10:55 -0700 Subject: libbpf : make libbpf_num_possible_cpus function thread safe Having static variable `cpus` in libbpf_num_possible_cpus function without guarding it with mutex makes this function thread-unsafe. If multiple threads accessing this function, in the current form; it leads to incrementing the static variable value `cpus` in the multiple of total available CPUs. Used local stack variable to calculate the number of possible CPUs and then updated the static variable using WRITE_ONCE(). Changes since v1: * added stack variable to calculate cpus * serialized static variable update using WRITE_ONCE() * fixed Fixes tag Fixes: 6446b3155521 ("bpf: add a new API libbpf_num_possible_cpus()") Signed-off-by: Takshak Chahande Acked-by: Andrey Ignatov Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6718d0b90130..2e84fa5b8479 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4995,13 +4995,15 @@ int libbpf_num_possible_cpus(void) static const char *fcpu = "/sys/devices/system/cpu/possible"; int len = 0, n = 0, il = 0, ir = 0; unsigned int start = 0, end = 0; + int tmp_cpus = 0; static int cpus; char buf[128]; int error = 0; int fd = -1; - if (cpus > 0) - return cpus; + tmp_cpus = READ_ONCE(cpus); + if (tmp_cpus > 0) + return tmp_cpus; fd = open(fcpu, O_RDONLY); if (fd < 0) { @@ -5024,7 +5026,7 @@ int libbpf_num_possible_cpus(void) } buf[len] = '\0'; - for (ir = 0, cpus = 0; ir <= len; ir++) { + for (ir = 0, tmp_cpus = 0; ir <= len; ir++) { /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ if (buf[ir] == ',' || buf[ir] == '\0') { buf[ir] = '\0'; @@ -5036,13 +5038,15 @@ int libbpf_num_possible_cpus(void) } else if (n == 1) { end = start; } - cpus += end - start + 1; + tmp_cpus += end - start + 1; il = ir + 1; } } - if (cpus <= 0) { - pr_warning("Invalid #CPUs %d from %s\n", cpus, fcpu); + if (tmp_cpus <= 0) { + pr_warning("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); return -EINVAL; } - return cpus; + + WRITE_ONCE(cpus, tmp_cpus); + return tmp_cpus; } -- cgit v1.2.1 From 3415ec643e7bd644b03026efbe2f2b36cbe9b34b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 1 Aug 2019 00:24:05 -0700 Subject: libbpf: set BTF FD for prog only when there is supported .BTF.ext data 5d01ab7bac46 ("libbpf: fix erroneous multi-closing of BTF FD") introduced backwards-compatibility issue, manifesting itself as -E2BIG error returned on program load due to unknown non-zero btf_fd attribute value for BPF_PROG_LOAD sys_bpf() sub-command. This patch fixes bug by ensuring that we only ever associate BTF FD with program if there is a BTF.ext data that was successfully loaded into kernel, which automatically means kernel supports func_info/line_info and associated BTF FD for progs (checked and ensured also by BTF sanitization code). Fixes: 5d01ab7bac46 ("libbpf: fix erroneous multi-closing of BTF FD") Reported-by: Andrey Ignatov Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2e84fa5b8479..2b57d7ea7836 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2472,7 +2472,11 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.license = license; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; - btf_fd = bpf_object__btf_fd(prog->obj); + /* if .BTF.ext was loaded, kernel supports associated BTF for prog */ + if (prog->obj->btf_ext) + btf_fd = bpf_object__btf_fd(prog->obj); + else + btf_fd = -1; load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; -- cgit v1.2.1 From 7c2e988f400e83501e0a3568250780609b7c8263 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 30 Jul 2019 18:38:26 -0700 Subject: bpf: fix x64 JIT code generation for jmp to 1st insn Introduction of bounded loops exposed old bug in x64 JIT. JIT maintains the array of offsets to the end of all instructions to compute jmp offsets. addrs[0] - offset of the end of the 1st insn (that includes prologue). addrs[1] - offset of the end of the 2nd insn. JIT didn't keep the offset of the beginning of the 1st insn, since classic BPF didn't have backward jumps and valid extended BPF couldn't have a branch to 1st insn, because it didn't allow loops. With bounded loops it's possible to construct a valid program that jumps backwards to the 1st insn. Fix JIT by computing: addrs[0] - offset of the end of prologue == start of the 1st insn. addrs[1] - offset of the end of 1st insn. v1->v2: - Yonghong noticed a bug in jit linfo. Fix it by passing 'addrs + 1' to bpf_prog_fill_jited_linfo(), since it expects insn_to_jit_off array to be offsets to last byte. Reported-by: syzbot+35101610ff3e83119b1b@syzkaller.appspotmail.com Fixes: 2589726d12a1 ("bpf: introduce bounded loops") Fixes: 0a14842f5a3c ("net: filter: Just In Time compiler for x86-64") Signed-off-by: Alexei Starovoitov Acked-by: Song Liu --- arch/x86/net/bpf_jit_comp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index eaaed5bfc4a4..991549a1c5f3 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -390,8 +390,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, emit_prologue(&prog, bpf_prog->aux->stack_depth, bpf_prog_was_classic(bpf_prog)); + addrs[0] = prog - temp; - for (i = 0; i < insn_cnt; i++, insn++) { + for (i = 1; i <= insn_cnt; i++, insn++) { const s32 imm32 = insn->imm; u32 dst_reg = insn->dst_reg; u32 src_reg = insn->src_reg; @@ -1105,7 +1106,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) extra_pass = true; goto skip_init_addrs; } - addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL); + addrs = kmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL); if (!addrs) { prog = orig_prog; goto out_addrs; @@ -1115,7 +1116,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) * Before first pass, make a rough estimation of addrs[] * each BPF instruction is translated to less than 64 bytes */ - for (proglen = 0, i = 0; i < prog->len; i++) { + for (proglen = 0, i = 0; i <= prog->len; i++) { proglen += 64; addrs[i] = proglen; } @@ -1180,7 +1181,7 @@ out_image: if (!image || !prog->is_func || extra_pass) { if (image) - bpf_prog_fill_jited_linfo(prog, addrs); + bpf_prog_fill_jited_linfo(prog, addrs + 1); out_addrs: kfree(addrs); kfree(jit_data); -- cgit v1.2.1 From f1fc7249dddc0e52d9e805e2e661caa118649509 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 30 Jul 2019 18:38:27 -0700 Subject: selftests/bpf: tests for jmp to 1st insn Add 2 tests that check JIT code generation to jumps to 1st insn. 1st test is similar to syzbot reproducer. The backwards branch is never taken at runtime. 2nd test has branch to 1st insn that executes. The test is written as two bpf functions, since it's not possible to construct valid single bpf program that jumps to 1st insn. Signed-off-by: Alexei Starovoitov Acked-by: Song Liu --- tools/testing/selftests/bpf/verifier/loops1.c | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/loops1.c b/tools/testing/selftests/bpf/verifier/loops1.c index 5e980a5ab69d..1fc4e61e9f9f 100644 --- a/tools/testing/selftests/bpf/verifier/loops1.c +++ b/tools/testing/selftests/bpf/verifier/loops1.c @@ -159,3 +159,31 @@ .errstr = "loop detected", .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, +{ + "not-taken loop with back jump to 1st insn", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 123), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 4, -2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .retval = 123, +}, +{ + "taken loop with back jump to 1st insn", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, -3), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .retval = 55, +}, -- cgit v1.2.1 From a5580eb394c8a48afc3e64aaad68db0d44662cc6 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Wed, 31 Jul 2019 16:11:51 +0200 Subject: ARM: dts: vf610-bk4: Fix qspi node description Before this change the device tree description of qspi node for second memory on BK4 board was wrong (applicable to old, removed fsl-quadspi.c driver). As a result this memory was not recognized correctly when used with the new spi-fsl-qspi.c driver. From the dt-bindings: "Required SPI slave node properties: - reg: There are two buses (A and B) with two chip selects each. This encodes to which bus and CS the flash is connected: <0>: Bus A, CS 0 <1>: Bus A, CS 1 <2>: Bus B, CS 0 <3>: Bus B, CS 1" According to above with new driver the second SPI-NOR memory shall have reg=<2> as it is connected to Bus B, CS 0. Fixes: a67d2c52a82f ("ARM: dts: Add support for Liebherr's BK4 device (vf610 based)") Suggested-by: Fabio Estevam Signed-off-by: Lukasz Majewski Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/vf610-bk4.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/vf610-bk4.dts b/arch/arm/boot/dts/vf610-bk4.dts index 3fa0cbe456db..0f3870d3b099 100644 --- a/arch/arm/boot/dts/vf610-bk4.dts +++ b/arch/arm/boot/dts/vf610-bk4.dts @@ -246,13 +246,13 @@ reg = <0>; }; - n25q128a13_2: flash@1 { + n25q128a13_2: flash@2 { compatible = "n25q128a13", "jedec,spi-nor"; #address-cells = <1>; #size-cells = <1>; spi-max-frequency = <66000000>; spi-rx-bus-width = <2>; - reg = <1>; + reg = <2>; }; }; -- cgit v1.2.1 From 97abfd5d801abb6eac35e2d7d725123950e2153d Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Tue, 16 Jul 2019 23:50:34 +0300 Subject: ARCv2: entry: early return from exception need not clear U & DE bits Exception handlers call FAKE_RET_FROM_EXCPN to - clear AE bit: drop down from exception active to pure kernel mode allowing further excptions - set IE bit: re-enable interrupts It additionally also clears U bit (user mode) and DE bit (delay slot execution) which is redundant as hardware does that already on any taken exception. Morevoer the current software clearing is bogus anyways as the KFLAG instruction being used for purpose can't possibly write those bits anyways. So don't pretend to clear them. Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta [vgupta: rewrote changelog] --- arch/arc/include/asm/entry-arcv2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h index f5ae394ebe06..41b16f21beec 100644 --- a/arch/arc/include/asm/entry-arcv2.h +++ b/arch/arc/include/asm/entry-arcv2.h @@ -256,7 +256,7 @@ .macro FAKE_RET_FROM_EXCPN lr r9, [status32] - bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK|STATUS_AE_MASK) + bic r9, r9, STATUS_AE_MASK or r9, r9, STATUS_IE_MASK kflag r9 .endm -- cgit v1.2.1 From da31076f96fc41af41d64e94b9fefe0d21c8ee9c Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Fri, 19 Jul 2019 21:46:00 +0300 Subject: ARC: fix typo in setup_dma_ops log message Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/mm/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 62c210e7ee4c..70a3fbe79fba 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -101,7 +101,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, if (is_isa_arcv2() && ioc_enable && coherent) dev->dma_coherent = true; - dev_info(dev, "use %sncoherent DMA ops\n", + dev_info(dev, "use %scoherent DMA ops\n", dev->dma_coherent ? "" : "non"); } -- cgit v1.2.1 From 0ca1bbb7f4212aeef83a67a8aed9da1d84567fcc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Jul 2019 14:57:18 +0200 Subject: selftests: netfilter: extend flowtable test script for ipsec 'flow offload' expression should not offload flows that will be subject to ipsec, but it does. This results in a connectivity blackhole for the affected flows -- first packets will go through (offload happens after established state is reached), but all remaining ones bypass ipsec encryption and are thus discarded by the peer. This can be worked around by adding "rt ipsec exists accept" before the 'flow offload' rule matches. This test case will fail, support for such flows is added in next patch. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_flowtable.sh | 48 ++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index fe52488a6f72..16571ac1dab4 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -321,4 +321,52 @@ else ip netns exec nsr1 nft list ruleset fi +KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1) +KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1) +SPI1=$RANDOM +SPI2=$RANDOM + +if [ $SPI1 -eq $SPI2 ]; then + SPI2=$((SPI2+1)) +fi + +do_esp() { + local ns=$1 + local me=$2 + local remote=$3 + local lnet=$4 + local rnet=$5 + local spi_out=$6 + local spi_in=$7 + + ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet + ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet + + # to encrypt packets as they go out (includes forwarded packets that need encapsulation) + ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow + # to fwd decrypted packets after esp processing: + ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow + +} + +do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 + +do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 + +ip netns exec nsr1 nft delete table ip nat + +# restore default routes +ip -net ns2 route del 192.168.10.1 via 10.0.2.1 +ip -net ns2 route add default via 10.0.2.1 +ip -net ns2 route add default via dead:2::1 + +test_tcp_forwarding ns1 ns2 +if [ $? -eq 0 ] ;then + echo "PASS: ipsec tunnel mode for ns1/ns2" +else + echo "FAIL: ipsec tunnel mode for ns1/ns2" + ip netns exec nsr1 nft list ruleset 1>&2 + ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2 +fi + exit $ret -- cgit v1.2.1 From 589b474a4b7ce409d6821ef17234a995841bd131 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Jul 2019 14:57:19 +0200 Subject: netfilter: nf_flow_table: fix offload for flows that are subject to xfrm This makes the previously added 'encap test' pass. Because its possible that the xfrm dst entry becomes stale while such a flow is offloaded, we need to call dst_check() -- the notifier that handles this for non-tunneled traffic isn't sufficient, because SA or or policies might have changed. If dst becomes stale the flow offload entry will be tagged for teardown and packets will be passed to 'classic' forwarding path. Removing the entry right away is problematic, as this would introduce a race condition with the gc worker. In case flow is long-lived, it could eventually be offloaded again once the gc worker removes the entry from the flow table. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 43 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index cdfc33517e85..d68c801dd614 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -214,6 +214,25 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) return true; } +static int nf_flow_offload_dst_check(struct dst_entry *dst) +{ + if (unlikely(dst_xfrm(dst))) + return dst_check(dst, 0) ? 0 : -1; + + return 0; +} + +static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, + const struct nf_hook_state *state, + struct dst_entry *dst) +{ + skb_orphan(skb); + skb_dst_set_noref(skb, dst); + skb->tstamp = 0; + dst_output(state->net, state->sk, skb); + return NF_STOLEN; +} + unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -254,6 +273,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff)) return NF_ACCEPT; + if (nf_flow_offload_dst_check(&rt->dst)) { + flow_offload_teardown(flow); + return NF_ACCEPT; + } + if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0) return NF_DROP; @@ -261,6 +285,13 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, iph = ip_hdr(skb); ip_decrease_ttl(iph); + if (unlikely(dst_xfrm(&rt->dst))) { + memset(skb->cb, 0, sizeof(struct inet_skb_parm)); + IPCB(skb)->iif = skb->dev->ifindex; + IPCB(skb)->flags = IPSKB_FORWARDED; + return nf_flow_xmit_xfrm(skb, state, &rt->dst); + } + skb->dev = outdev; nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); skb_dst_set_noref(skb, &rt->dst); @@ -467,6 +498,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, sizeof(*ip6h))) return NF_ACCEPT; + if (nf_flow_offload_dst_check(&rt->dst)) { + flow_offload_teardown(flow); + return NF_ACCEPT; + } + if (skb_try_make_writable(skb, sizeof(*ip6h))) return NF_DROP; @@ -477,6 +513,13 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, ip6h = ipv6_hdr(skb); ip6h->hop_limit--; + if (unlikely(dst_xfrm(&rt->dst))) { + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); + IP6CB(skb)->iif = skb->dev->ifindex; + IP6CB(skb)->flags = IP6SKB_FORWARDED; + return nf_flow_xmit_xfrm(skb, state, &rt->dst); + } + skb->dev = outdev; nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); skb_dst_set_noref(skb, &rt->dst); -- cgit v1.2.1 From ce0eff0d9b4d37702df48a39e3fddb5e39b2c25b Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 22 Jul 2019 12:31:45 +0300 Subject: ARC: [plat-hsdk]: allow to switch between AXI DMAC port configurations We want to use DW AXI DMAC on HSDK board in our automated verification to test cache & dma kernel code changes. This is perfect candidate as we don't depend on any external peripherals like MMC card / USB storage / etc. To increase test coverage we want to test both options: * DW AXI DMAC is connected through IOC port & dma direct ops used * DW AXI DMAC is connected to DDR port & dma noncoherent ops used Introduce 'arc_hsdk_axi_dmac_coherent' global variable which can be modified by debugger (same way as we patch 'ioc_enable') to switch between these options without recompiling the kernel. Depend on this value we tweak memory bridge configuration and "dma-coherent" DTS property of DW AXI DMAC. Signed-off-by: Eugeniy Paltsev Acked-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/Makefile | 3 ++ arch/arc/plat-hsdk/platform.c | 87 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 78 insertions(+), 12 deletions(-) diff --git a/arch/arc/boot/dts/Makefile b/arch/arc/boot/dts/Makefile index a83c4f5e928b..8483a86c743d 100644 --- a/arch/arc/boot/dts/Makefile +++ b/arch/arc/boot/dts/Makefile @@ -12,3 +12,6 @@ dtb-y := $(builtindtb-y).dtb # for CONFIG_OF_ALL_DTBS test dtstree := $(srctree)/$(src) dtb- := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts)) + +# board-specific dtc flags +DTC_FLAGS_hsdk += --pad 20 diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index 7dd2dd335cf6..0b961a2a10b8 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -6,11 +6,15 @@ */ #include +#include +#include #include #include #include #include +int arc_hsdk_axi_dmac_coherent __section(.data) = 0; + #define ARC_CCM_UNUSED_ADDR 0x60000000 static void __init hsdk_init_per_cpu(unsigned int cpu) @@ -97,6 +101,42 @@ static void __init hsdk_enable_gpio_intc_wire(void) iowrite32(GPIO_INT_CONNECTED_MASK, (void __iomem *) GPIO_INTEN); } +static int __init hsdk_tweak_node_coherency(const char *path, bool coherent) +{ + void *fdt = initial_boot_params; + const void *prop; + int node, ret; + bool dt_coh_set; + + node = fdt_path_offset(fdt, path); + if (node < 0) + goto tweak_fail; + + prop = fdt_getprop(fdt, node, "dma-coherent", &ret); + if (!prop && ret != -FDT_ERR_NOTFOUND) + goto tweak_fail; + + dt_coh_set = ret != -FDT_ERR_NOTFOUND; + ret = 0; + + /* need to remove "dma-coherent" property */ + if (dt_coh_set && !coherent) + ret = fdt_delprop(fdt, node, "dma-coherent"); + + /* need to set "dma-coherent" property */ + if (!dt_coh_set && coherent) + ret = fdt_setprop(fdt, node, "dma-coherent", NULL, 0); + + if (ret < 0) + goto tweak_fail; + + return 0; + +tweak_fail: + pr_err("failed to tweak %s to %scoherent\n", path, coherent ? "" : "non"); + return -EFAULT; +} + enum hsdk_axi_masters { M_HS_CORE = 0, M_HS_RTT, @@ -162,6 +202,39 @@ enum hsdk_axi_masters { #define CREG_PAE ((void __iomem *)(CREG_BASE + 0x180)) #define CREG_PAE_UPDT ((void __iomem *)(CREG_BASE + 0x194)) +static void __init hsdk_init_memory_bridge_axi_dmac(void) +{ + bool coherent = !!arc_hsdk_axi_dmac_coherent; + u32 axi_m_slv1, axi_m_oft1; + + /* + * Don't tweak memory bridge configuration if we failed to tweak DTB + * as we will end up in a inconsistent state. + */ + if (hsdk_tweak_node_coherency("/soc/dmac@80000", coherent)) + return; + + if (coherent) { + axi_m_slv1 = 0x77999999; + axi_m_oft1 = 0x76DCBA98; + } else { + axi_m_slv1 = 0x77777777; + axi_m_oft1 = 0x76543210; + } + + writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_0)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_0)); + writel(axi_m_slv1, CREG_AXI_M_SLV1(M_DMAC_0)); + writel(axi_m_oft1, CREG_AXI_M_OFT1(M_DMAC_0)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_0)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_1)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_1)); + writel(axi_m_slv1, CREG_AXI_M_SLV1(M_DMAC_1)); + writel(axi_m_oft1, CREG_AXI_M_OFT1(M_DMAC_1)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_1)); +} + static void __init hsdk_init_memory_bridge(void) { u32 reg; @@ -227,24 +300,14 @@ static void __init hsdk_init_memory_bridge(void) writel(0x76543210, CREG_AXI_M_OFT1(M_GPU)); writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_GPU)); - writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_0)); - writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_0)); - writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_0)); - writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_0)); - writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_0)); - - writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_1)); - writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_1)); - writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_1)); - writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_1)); - writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_1)); - writel(0x00000000, CREG_AXI_M_SLV0(M_DVFS)); writel(0x60000000, CREG_AXI_M_SLV1(M_DVFS)); writel(0x00000000, CREG_AXI_M_OFT0(M_DVFS)); writel(0x00000000, CREG_AXI_M_OFT1(M_DVFS)); writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DVFS)); + hsdk_init_memory_bridge_axi_dmac(); + /* * PAE remapping for DMA clients does not work due to an RTL bug, so * CREG_PAE register must be programmed to all zeroes, otherwise it -- cgit v1.2.1 From 3d4bacdc207a7b62941700b374e7199cbb184a43 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 25 Jun 2019 14:36:47 +0200 Subject: arm64: dts: meson-g12a: add missing dwc2 phy-names The G12A USB2 OTG capable PHY uses a 8bit large UTMI bus, and the OTG controller gets the PHY but width by probing the associated phy. By default it will use 16bit wide settings if a phy is not specified, in our case we specified the phy, but not the phy-names. The dwc2 bindings specifies that if phys is present, phy-names shall be "usb2-phy". Adding phy-names = "usb2-phy" solves the OTG PHY bus configuration. Fixes: 9baf7d6be730 ("arm64: dts: meson: g12a: Add G12A USB nodes") Signed-off-by: Neil Armstrong Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-g12a.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12a.dtsi index f8d43e3dcf20..1785552d450c 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12a.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12a.dtsi @@ -2386,6 +2386,7 @@ clocks = <&clkc CLKID_USB1_DDR_BRIDGE>; clock-names = "ddr"; phys = <&usb2_phy1>; + phy-names = "usb2-phy"; dr_mode = "peripheral"; g-rx-fifo-size = <192>; g-np-tx-fifo-size = <128>; -- cgit v1.2.1 From 54f374d1fd302fe6ca21220174c1dcb294049311 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 1 Jul 2019 13:57:24 +0200 Subject: arm64: dts: meson-g12a-sei510: enable IR controller Enable the IR receiver controller on the SEI510 board. Signed-off-by: Neil Armstrong Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts index c7a87368850b..12aa7eaeaf68 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts @@ -339,6 +339,12 @@ pinctrl-names = "default"; }; +&ir { + status = "okay"; + pinctrl-0 = <&remote_input_ao_pins>; + pinctrl-names = "default"; +}; + &pwm_ef { status = "okay"; pinctrl-0 = <&pwm_e_pins>; -- cgit v1.2.1 From dc7f2cb218b5ef65ab3d455a0e62d27e44075203 Mon Sep 17 00:00:00 2001 From: Xavier Ruppen Date: Fri, 19 Jul 2019 21:29:54 +0200 Subject: arm64: dts: amlogic: odroid-n2: keep SD card regulator always on When powering off the Odroid N2, the tflash_vdd regulator is automatically turned off by the kernel. This is a problem when issuing the "reboot" command while using an SD card. The boot ROM does not power this regulator back on, blocking the reboot process at the boot ROM stage, preventing the SD card from being detected. Adding the "regulator-always-on" property fixes the problem. Signed-off-by: Xavier Ruppen Suggested-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Reviewed-by: Martin Blumenstingl Fixes: c35f6dc5c377 ("arm64: dts: meson: Add minimal support for Odroid-N2") [khilman: minor subject change: s/meson/amlogic/] Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dts b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dts index 81780ffcc7f0..4e916e1f71f7 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dts @@ -53,6 +53,7 @@ gpio = <&gpio_ao GPIOAO_8 GPIO_ACTIVE_HIGH>; enable-active-high; + regulator-always-on; }; tf_io: gpio-regulator-tf_io { -- cgit v1.2.1 From e86d94fdda8e11a2acbe0a910e82f7519f6088b7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 5 Aug 2019 14:32:32 -0500 Subject: ARC: unwind: Mark expected switch fall-throughs Mark switch cases where we are expecting to fall through. This patch fixes the following warnings (Building: haps_hs_defconfig arc): arch/arc/kernel/unwind.c:827:20: warning: this statement may fall through [-Wimplicit-fallthrough=] arch/arc/kernel/unwind.c:836:20: warning: this statement may fall through [-Wimplicit-fallthrough=] Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva Signed-off-by: Vineet Gupta --- arch/arc/kernel/unwind.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index c2663fce7f6c..445e4d702f43 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -826,7 +826,7 @@ static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc, case DW_CFA_def_cfa: state->cfa.reg = get_uleb128(&ptr.p8, end); unw_debug("cfa_def_cfa: r%lu ", state->cfa.reg); - /*nobreak*/ + /* fall through */ case DW_CFA_def_cfa_offset: state->cfa.offs = get_uleb128(&ptr.p8, end); unw_debug("cfa_def_cfa_offset: 0x%lx ", @@ -834,7 +834,7 @@ static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc, break; case DW_CFA_def_cfa_sf: state->cfa.reg = get_uleb128(&ptr.p8, end); - /*nobreak */ + /* fall through */ case DW_CFA_def_cfa_offset_sf: state->cfa.offs = get_sleb128(&ptr.p8, end) * state->dataAlign; -- cgit v1.2.1 From 405b93eb764367a670e729da18e54dc42db32620 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Fri, 5 Jul 2019 17:59:28 +0300 Subject: net/mlx5e: Use flow keys dissector to parse packets for ARFS The current ARFS code relies on certain fields to be set in the SKB (e.g. transport_header) and extracts IP addresses and ports by custom code that parses the packet. The necessary SKB fields, however, are not always set at that point, which leads to an out-of-bounds access. Use skb_flow_dissect_flow_keys() to get the necessary information reliably, fix the out-of-bounds access and reuse the code. Fixes: 18c908e477dc ("net/mlx5e: Add accelerated RFS support") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 97 ++++++++--------------- 1 file changed, 34 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 8657e0f26995..2c75b2752f58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -437,12 +437,6 @@ arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port, return &arfs_t->rules_hash[bucket_idx]; } -static u8 arfs_get_ip_proto(const struct sk_buff *skb) -{ - return (skb->protocol == htons(ETH_P_IP)) ? - ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr; -} - static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, u8 ip_proto, __be16 etype) { @@ -602,31 +596,9 @@ out: arfs_may_expire_flow(priv); } -/* return L4 destination port from ip4/6 packets */ -static __be16 arfs_get_dst_port(const struct sk_buff *skb) -{ - char *transport_header; - - transport_header = skb_transport_header(skb); - if (arfs_get_ip_proto(skb) == IPPROTO_TCP) - return ((struct tcphdr *)transport_header)->dest; - return ((struct udphdr *)transport_header)->dest; -} - -/* return L4 source port from ip4/6 packets */ -static __be16 arfs_get_src_port(const struct sk_buff *skb) -{ - char *transport_header; - - transport_header = skb_transport_header(skb); - if (arfs_get_ip_proto(skb) == IPPROTO_TCP) - return ((struct tcphdr *)transport_header)->source; - return ((struct udphdr *)transport_header)->source; -} - static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, struct arfs_table *arfs_t, - const struct sk_buff *skb, + const struct flow_keys *fk, u16 rxq, u32 flow_id) { struct arfs_rule *rule; @@ -641,19 +613,19 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, INIT_WORK(&rule->arfs_work, arfs_handle_work); tuple = &rule->tuple; - tuple->etype = skb->protocol; + tuple->etype = fk->basic.n_proto; + tuple->ip_proto = fk->basic.ip_proto; if (tuple->etype == htons(ETH_P_IP)) { - tuple->src_ipv4 = ip_hdr(skb)->saddr; - tuple->dst_ipv4 = ip_hdr(skb)->daddr; + tuple->src_ipv4 = fk->addrs.v4addrs.src; + tuple->dst_ipv4 = fk->addrs.v4addrs.dst; } else { - memcpy(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr, + memcpy(&tuple->src_ipv6, &fk->addrs.v6addrs.src, sizeof(struct in6_addr)); - memcpy(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr, + memcpy(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst, sizeof(struct in6_addr)); } - tuple->ip_proto = arfs_get_ip_proto(skb); - tuple->src_port = arfs_get_src_port(skb); - tuple->dst_port = arfs_get_dst_port(skb); + tuple->src_port = fk->ports.src; + tuple->dst_port = fk->ports.dst; rule->flow_id = flow_id; rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER; @@ -664,37 +636,33 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, return rule; } -static bool arfs_cmp_ips(struct arfs_tuple *tuple, - const struct sk_buff *skb) +static bool arfs_cmp(const struct arfs_tuple *tuple, const struct flow_keys *fk) { - if (tuple->etype == htons(ETH_P_IP) && - tuple->src_ipv4 == ip_hdr(skb)->saddr && - tuple->dst_ipv4 == ip_hdr(skb)->daddr) - return true; - if (tuple->etype == htons(ETH_P_IPV6) && - (!memcmp(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr, - sizeof(struct in6_addr))) && - (!memcmp(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr, - sizeof(struct in6_addr)))) - return true; + if (tuple->src_port != fk->ports.src || tuple->dst_port != fk->ports.dst) + return false; + if (tuple->etype != fk->basic.n_proto) + return false; + if (tuple->etype == htons(ETH_P_IP)) + return tuple->src_ipv4 == fk->addrs.v4addrs.src && + tuple->dst_ipv4 == fk->addrs.v4addrs.dst; + if (tuple->etype == htons(ETH_P_IPV6)) + return !memcmp(&tuple->src_ipv6, &fk->addrs.v6addrs.src, + sizeof(struct in6_addr)) && + !memcmp(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst, + sizeof(struct in6_addr)); return false; } static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t, - const struct sk_buff *skb) + const struct flow_keys *fk) { struct arfs_rule *arfs_rule; struct hlist_head *head; - __be16 src_port = arfs_get_src_port(skb); - __be16 dst_port = arfs_get_dst_port(skb); - head = arfs_hash_bucket(arfs_t, src_port, dst_port); + head = arfs_hash_bucket(arfs_t, fk->ports.src, fk->ports.dst); hlist_for_each_entry(arfs_rule, head, hlist) { - if (arfs_rule->tuple.src_port == src_port && - arfs_rule->tuple.dst_port == dst_port && - arfs_cmp_ips(&arfs_rule->tuple, skb)) { + if (arfs_cmp(&arfs_rule->tuple, fk)) return arfs_rule; - } } return NULL; @@ -707,20 +675,24 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; struct arfs_table *arfs_t; struct arfs_rule *arfs_rule; + struct flow_keys fk; + + if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) + return -EPROTONOSUPPORT; - if (skb->protocol != htons(ETH_P_IP) && - skb->protocol != htons(ETH_P_IPV6)) + if (fk.basic.n_proto != htons(ETH_P_IP) && + fk.basic.n_proto != htons(ETH_P_IPV6)) return -EPROTONOSUPPORT; if (skb->encapsulation) return -EPROTONOSUPPORT; - arfs_t = arfs_get_table(arfs, arfs_get_ip_proto(skb), skb->protocol); + arfs_t = arfs_get_table(arfs, fk.basic.ip_proto, fk.basic.n_proto); if (!arfs_t) return -EPROTONOSUPPORT; spin_lock_bh(&arfs->arfs_lock); - arfs_rule = arfs_find_rule(arfs_t, skb); + arfs_rule = arfs_find_rule(arfs_t, &fk); if (arfs_rule) { if (arfs_rule->rxq == rxq_index) { spin_unlock_bh(&arfs->arfs_lock); @@ -728,8 +700,7 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, } arfs_rule->rxq = rxq_index; } else { - arfs_rule = arfs_alloc_rule(priv, arfs_t, skb, - rxq_index, flow_id); + arfs_rule = arfs_alloc_rule(priv, arfs_t, &fk, rxq_index, flow_id); if (!arfs_rule) { spin_unlock_bh(&arfs->arfs_lock); return -ENOMEM; -- cgit v1.2.1 From 93b3586e070b14704dd7bff81fbcefd64663f3c2 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Wed, 17 Jul 2019 14:04:31 -0500 Subject: net/mlx5: Support inner header match criteria for non decap flow action We have an issue that OVS application creates an offloaded drop rule that drops VXLAN traffic with both inner and outer header match criteria. mlx5_core driver detects correctly the inner and outer header match criteria but does not enable the inner header match criteria due to an incorrect assumption in mlx5_eswitch_add_offloaded_rule that only decap rule needs inner header criteria. Solution: Remove mlx5_esw_flow_attr's match_level and tunnel_match_level and add two new members: inner_match_level and outer_match_level. inner/outer_match_level is set to NONE if the inner/outer match criteria is not specified in the tc rule creation request. The decap assumption is removed and the code just needs to check for inner/outer_match_level to enable the corresponding bit in firmware's match_criteria_enable value. Fixes: 6363651d6dd7 ("net/mlx5e: Properly set steering match levels for offloaded TC decap rules") Signed-off-by: Huy Nguyen Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 31 ++++++++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 4 +-- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 12 +++------ 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 7ecfc53cf5f6..deeb65da99f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1480,7 +1480,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, struct net_device *filter_dev, - u8 *match_level, u8 *tunnel_match_level) + u8 *inner_match_level, u8 *outer_match_level) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@ -1495,8 +1495,9 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, struct flow_dissector *dissector = rule->match.dissector; u16 addr_type = 0; u8 ip_proto = 0; + u8 *match_level; - *match_level = MLX5_MATCH_NONE; + match_level = outer_match_level; if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_META) | @@ -1524,12 +1525,14 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, } if (mlx5e_get_tc_tun(filter_dev)) { - if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) + if (parse_tunnel_attr(priv, spec, f, filter_dev, + outer_match_level)) return -EOPNOTSUPP; - /* In decap flow, header pointers should point to the inner + /* At this point, header pointers should point to the inner * headers, outer header were already set by parse_tunnel_attr */ + match_level = inner_match_level; headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP, spec); headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP, @@ -1831,35 +1834,41 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct flow_cls_offload *f, struct net_device *filter_dev) { + u8 inner_match_level, outer_match_level, non_tunnel_match_level; struct netlink_ext_ack *extack = f->common.extack; struct mlx5_core_dev *dev = priv->mdev; struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; - u8 match_level, tunnel_match_level = MLX5_MATCH_NONE; struct mlx5_eswitch_rep *rep; int err; - err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level, &tunnel_match_level); + inner_match_level = MLX5_MATCH_NONE; + outer_match_level = MLX5_MATCH_NONE; + + err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level, + &outer_match_level); + non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? + outer_match_level : inner_match_level; if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { rep = rpriv->rep; if (rep->vport != MLX5_VPORT_UPLINK && (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && - esw->offloads.inline_mode < match_level)) { + esw->offloads.inline_mode < non_tunnel_match_level)) { NL_SET_ERR_MSG_MOD(extack, "Flow is not offloaded due to min inline setting"); netdev_warn(priv->netdev, "Flow is not offloaded due to min inline setting, required %d actual %d\n", - match_level, esw->offloads.inline_mode); + non_tunnel_match_level, esw->offloads.inline_mode); return -EOPNOTSUPP; } } if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { - flow->esw_attr->match_level = match_level; - flow->esw_attr->tunnel_match_level = tunnel_match_level; + flow->esw_attr->inner_match_level = inner_match_level; + flow->esw_attr->outer_match_level = outer_match_level; } else { - flow->nic_attr->match_level = match_level; + flow->nic_attr->match_level = non_tunnel_match_level; } return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index a38e8a3c7c9a..04685dbb280c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -377,8 +377,8 @@ struct mlx5_esw_flow_attr { struct mlx5_termtbl_handle *termtbl; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; u32 mod_hdr_id; - u8 match_level; - u8 tunnel_match_level; + u8 inner_match_level; + u8 outer_match_level; struct mlx5_fc *counter; u32 chain; u16 prio; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 089ae4d48a82..0323fd078271 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -207,14 +207,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, mlx5_eswitch_set_rule_source_port(esw, spec, attr); - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { - if (attr->tunnel_match_level != MLX5_MATCH_NONE) - spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; - if (attr->match_level != MLX5_MATCH_NONE) - spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; - } else if (attr->match_level != MLX5_MATCH_NONE) { + if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; - } + if (attr->inner_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_id = attr->mod_hdr_id; @@ -290,7 +286,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, mlx5_eswitch_set_rule_source_port(esw, spec, attr); spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; - if (attr->match_level != MLX5_MATCH_NONE) + if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); -- cgit v1.2.1 From 466df6eb4a9e813b3cfc674363316450c57a89c5 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 1 Aug 2019 11:10:19 -0500 Subject: net/mlx5e: Only support tx/rx pause setting for port owner Only support changing tx/rx pause frame setting if the net device is the vport group manager. Fixes: 3c2d18ef22df ("net/mlx5e: Support ethtool get/set_pauseparam") Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 03bed714bac3..ee9fa0c2c8b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1338,6 +1338,9 @@ int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, struct mlx5_core_dev *mdev = priv->mdev; int err; + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EOPNOTSUPP; + if (pauseparam->autoneg) return -EINVAL; -- cgit v1.2.1 From 5faf5b70c51dd9c9905bf8209e33cbd867486607 Mon Sep 17 00:00:00 2001 From: Mohamad Heib Date: Tue, 23 Apr 2019 21:13:48 +0300 Subject: net/mlx5e: ethtool, Avoid setting speed to 56GBASE when autoneg off Setting speed to 56GBASE is allowed only with auto-negotiation enabled. This patch prevent setting speed to 56GBASE when auto-negotiation disabled. Fixes: f62b8bb8f2d3 ("net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality") Signed-off-by: Mohamad Heib Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index ee9fa0c2c8b9..e89dba790a2d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1081,6 +1081,14 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) : mlx5e_port_speed2linkmodes(mdev, speed, !ext); + if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) && + autoneg != AUTONEG_ENABLE) { + netdev_err(priv->netdev, "%s: 56G link speed requires autoneg enabled\n", + __func__); + err = -EINVAL; + goto out; + } + link_modes = link_modes & eproto.cap; if (!link_modes) { netdev_err(priv->netdev, "%s: Not supported link mode(s) requested", -- cgit v1.2.1 From 55c9bd37ef5a0bd79c80c8eb418ce162bbc65590 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 21 Jul 2019 14:13:15 +0300 Subject: net/mlx5: crypto, Fix wrong offset in encryption key command Fix the 128b key offset in key encryption key creation command, per the HW specification. Fixes: 45d3b55dc665 ("net/mlx5: Add crypto library to support create/destroy encryption key") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c index ea9ee88491e5..ea1d4d26ece0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c @@ -27,6 +27,7 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, case 128: general_obj_key_size = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128; + key_p += sz_bytes; break; case 256: general_obj_key_size = -- cgit v1.2.1 From 26149e3e1f44d27897d0af9ca4bcd723674bad44 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 21 Jul 2019 14:18:42 +0300 Subject: net/mlx5: kTLS, Fix wrong TIS opmod constants Fix the used constants for TLS TIS opmods, per the HW specification. Fixes: a12ff35e0fb7 ("net/mlx5: Introduce TLS TX offload hardware bits and structures") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index ce9839c8bc1a..c2f056b5766d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -446,11 +446,11 @@ enum { }; enum { - MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS = 0x20, + MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS = 0x1, }; enum { - MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS = 0x20, + MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS = 0x1, }; enum { -- cgit v1.2.1 From a9bc3390327317345dd4683b70970c83ab400ea3 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 30 Jul 2019 11:55:25 +0300 Subject: net/mlx5e: kTLS, Fix progress params context WQE layout The TLS progress params context WQE should not include an Eth segment, drop it. In addition, align the tls_progress_params layout with the HW specification document: - fix the tisn field name. - remove the valid bit. Fixes: a12ff35e0fb7 ("net/mlx5: Introduce TLS TX offload hardware bits and structures") Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 9 +++++++-- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h | 6 ++++-- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 4 ++-- include/linux/mlx5/mlx5_ifc.h | 5 ++--- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ce1be2a84231..f6b64a03cd06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -184,8 +184,13 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; - struct mlx5_wqe_eth_seg eth; - struct mlx5_wqe_data_seg data[0]; + union { + struct { + struct mlx5_wqe_eth_seg eth; + struct mlx5_wqe_data_seg data[0]; + }; + u8 tls_progress_params_ctx[0]; + }; }; struct mlx5e_rx_wqe_ll { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index 407da83474ef..b7298f9ee3d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -11,12 +11,14 @@ #include "accel/tls.h" #define MLX5E_KTLS_STATIC_UMR_WQE_SZ \ - (sizeof(struct mlx5e_umr_wqe) + MLX5_ST_SZ_BYTES(tls_static_params)) + (offsetof(struct mlx5e_umr_wqe, tls_static_params_ctx) + \ + MLX5_ST_SZ_BYTES(tls_static_params)) #define MLX5E_KTLS_STATIC_WQEBBS \ (DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_BB)) #define MLX5E_KTLS_PROGRESS_WQE_SZ \ - (sizeof(struct mlx5e_tx_wqe) + MLX5_ST_SZ_BYTES(tls_progress_params)) + (offsetof(struct mlx5e_tx_wqe, tls_progress_params_ctx) + \ + MLX5_ST_SZ_BYTES(tls_progress_params)) #define MLX5E_KTLS_PROGRESS_WQEBBS \ (DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_BB)) #define MLX5E_KTLS_MAX_DUMP_WQEBBS 2 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 3766545ce259..9f67bfb559f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -80,7 +80,7 @@ build_static_params(struct mlx5e_umr_wqe *wqe, u16 pc, u32 sqn, static void fill_progress_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx) { - MLX5_SET(tls_progress_params, ctx, pd, priv_tx->tisn); + MLX5_SET(tls_progress_params, ctx, tisn, priv_tx->tisn); MLX5_SET(tls_progress_params, ctx, record_tracker_state, MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START); MLX5_SET(tls_progress_params, ctx, auth_state, @@ -104,7 +104,7 @@ build_progress_params(struct mlx5e_tx_wqe *wqe, u16 pc, u32 sqn, PROGRESS_PARAMS_DS_CNT); cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; - fill_progress_params_ctx(wqe->data, priv_tx); + fill_progress_params_ctx(wqe->tls_progress_params_ctx, priv_tx); } static void tx_fill_wi(struct mlx5e_txqsq *sq, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ec571fd7fcf8..b8b570c30b5e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10054,9 +10054,8 @@ struct mlx5_ifc_tls_static_params_bits { }; struct mlx5_ifc_tls_progress_params_bits { - u8 valid[0x1]; - u8 reserved_at_1[0x7]; - u8 pd[0x18]; + u8 reserved_at_0[0x8]; + u8 tisn[0x18]; u8 next_record_tcp_sn[0x20]; -- cgit v1.2.1 From f1897b3cd1af1dce8d6c06b06f02551c71112003 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 8 Aug 2019 12:26:57 +0300 Subject: net/mlx5e: kTLS, Fix tisn field name Use the proper tisn field name from the union in struct mlx5_wqe_ctrl_seg. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 9f67bfb559f1..cfc9e7d457e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -69,7 +69,7 @@ build_static_params(struct mlx5e_umr_wqe *wqe, u16 pc, u32 sqn, cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | STATIC_PARAMS_DS_CNT); cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; - cseg->imm = cpu_to_be32(priv_tx->tisn); + cseg->tisn = cpu_to_be32(priv_tx->tisn); ucseg->flags = MLX5_UMR_INLINE; ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16); @@ -278,7 +278,7 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, struct sk_buff *skb, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - cseg->imm = cpu_to_be32(tisn); + cseg->tisn = cpu_to_be32(tisn); cseg->fm_ce_se = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; eseg->inline_hdr.sz = cpu_to_be16(ihs); @@ -434,7 +434,7 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, priv_tx->expected_seq = seq + datalen; cseg = &(*wqe)->ctrl; - cseg->imm = cpu_to_be32(priv_tx->tisn); + cseg->tisn = cpu_to_be32(priv_tx->tisn); stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; stats->tls_encrypted_bytes += datalen; -- cgit v1.2.1 From b86f1abe2c275e6f9abf7e2669ccc318b7ef7bb9 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 30 Jul 2019 13:45:42 +0300 Subject: net/mlx5e: kTLS, Fix tisn field placement Shift the tisn field in the WQE control segment, per the HW specification. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index cfc9e7d457e3..8b93101e1a09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -69,7 +69,7 @@ build_static_params(struct mlx5e_umr_wqe *wqe, u16 pc, u32 sqn, cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | STATIC_PARAMS_DS_CNT); cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; - cseg->tisn = cpu_to_be32(priv_tx->tisn); + cseg->tisn = cpu_to_be32(priv_tx->tisn << 8); ucseg->flags = MLX5_UMR_INLINE; ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16); @@ -278,7 +278,7 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, struct sk_buff *skb, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - cseg->tisn = cpu_to_be32(tisn); + cseg->tisn = cpu_to_be32(tisn << 8); cseg->fm_ce_se = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; eseg->inline_hdr.sz = cpu_to_be16(ihs); @@ -434,7 +434,7 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, priv_tx->expected_seq = seq + datalen; cseg = &(*wqe)->ctrl; - cseg->tisn = cpu_to_be32(priv_tx->tisn); + cseg->tisn = cpu_to_be32(priv_tx->tisn << 8); stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; stats->tls_encrypted_bytes += datalen; -- cgit v1.2.1 From d9a2fcf53c76a7edb2bcf99e94507935561a83d5 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 7 Aug 2019 15:59:06 +0300 Subject: net/mlx5e: Fix false negative indication on tx reporter CQE recovery Remove wrong error return value when SQ is not in error state. CQE recovery on TX reporter queries the sq state. If the sq is not in error state, the sq is either in ready or reset state. Ready state is good state which doesn't require recovery and reset state is a temporal state which ends in ready state. With this patch, CQE recovery in this scenario is successful. Fixes: de8650a82071 ("net/mlx5e: Add tx reporter support") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index f3d98748b211..b307234b4e05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -86,10 +86,8 @@ static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) return err; } - if (state != MLX5_SQC_STATE_ERR) { - netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); - return -EINVAL; - } + if (state != MLX5_SQC_STATE_ERR) + return 0; mlx5e_tx_disable_queue(sq->txq); -- cgit v1.2.1 From 276d197e70bcc47153592f4384675b51c7d83aba Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 6 Aug 2019 15:19:19 +0300 Subject: net/mlx5e: Fix error flow of CQE recovery on tx reporter CQE recovery function begins with test and set of recovery bit. Add an error flow which ensures clearing of this bit when leaving the recovery function, to allow further recoveries to take place. This allows removal of clearing recovery bit on sq activate. Fixes: de8650a82071 ("net/mlx5e: Add tx reporter support") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 12 ++++++++---- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 - 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index b307234b4e05..b91814ecfbc9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -83,17 +83,17 @@ static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) if (err) { netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", sq->sqn, err); - return err; + goto out; } if (state != MLX5_SQC_STATE_ERR) - return 0; + goto out; mlx5e_tx_disable_queue(sq->txq); err = mlx5e_wait_for_sq_flush(sq); if (err) - return err; + goto out; /* At this point, no new packets will arrive from the stack as TXQ is * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all @@ -102,13 +102,17 @@ static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) err = mlx5e_sq_to_ready(sq, state); if (err) - return err; + goto out; mlx5e_reset_txqsq_cc_pc(sq); sq->stats->recover++; + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); mlx5e_activate_txqsq(sq); return 0; +out: + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); + return err; } static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6c712c5be4d8..9d5f6e56188f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1321,7 +1321,6 @@ err_free_txqsq: void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) { sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); - clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); netdev_tx_reset_queue(sq->txq); netif_tx_start_queue(sq->txq); -- cgit v1.2.1 From a4e508cab623951dc4754f346e5673714f3bbade Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 8 Aug 2019 15:55:48 +0300 Subject: net/mlx5e: Remove redundant check in CQE recovery flow of tx reporter Remove check of recovery bit, in the beginning of the CQE recovery function. This test is already performed right before the reporter is invoked, when CQE error is detected. Fixes: de8650a82071 ("net/mlx5e: Add tx reporter support") Signed-off-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index b91814ecfbc9..c7f86453c638 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -76,9 +76,6 @@ static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) u8 state; int err; - if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) - return 0; - err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); if (err) { netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", -- cgit v1.2.1 From bf32e7dbfce87d518c0ca77af890eae9ab8d6ab9 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Thu, 8 Aug 2019 16:49:28 +0200 Subject: clk: samsung: Change signature of exynos5_subcmus_init() function In order to make it easier in subsequent patch to create different subcmu lists for exynos5420 and exynos5800 SoCs the code is rewritten so we pass an array of pointers to the subcmus initialization function. Fixes: b06a532bf1fa ("clk: samsung: Add Exynos5 sub-CMU clock driver") Tested-by: Jaafar Ali Signed-off-by: Sylwester Nawrocki Link: https://lkml.kernel.org/r/20190808144929.18685-1-s.nawrocki@samsung.com Reviewed-by: Marek Szyprowski Signed-off-by: Stephen Boyd --- drivers/clk/samsung/clk-exynos5-subcmu.c | 16 ++++----- drivers/clk/samsung/clk-exynos5-subcmu.h | 2 +- drivers/clk/samsung/clk-exynos5250.c | 7 +++- drivers/clk/samsung/clk-exynos5420.c | 60 ++++++++++++++++++-------------- 4 files changed, 49 insertions(+), 36 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos5-subcmu.c b/drivers/clk/samsung/clk-exynos5-subcmu.c index 91db7894125d..65c82d922b05 100644 --- a/drivers/clk/samsung/clk-exynos5-subcmu.c +++ b/drivers/clk/samsung/clk-exynos5-subcmu.c @@ -14,7 +14,7 @@ #include "clk-exynos5-subcmu.h" static struct samsung_clk_provider *ctx; -static const struct exynos5_subcmu_info *cmu; +static const struct exynos5_subcmu_info **cmu; static int nr_cmus; static void exynos5_subcmu_clk_save(void __iomem *base, @@ -56,17 +56,17 @@ static void exynos5_subcmu_defer_gate(struct samsung_clk_provider *ctx, * when OF-core populates all device-tree nodes. */ void exynos5_subcmus_init(struct samsung_clk_provider *_ctx, int _nr_cmus, - const struct exynos5_subcmu_info *_cmu) + const struct exynos5_subcmu_info **_cmu) { ctx = _ctx; cmu = _cmu; nr_cmus = _nr_cmus; for (; _nr_cmus--; _cmu++) { - exynos5_subcmu_defer_gate(ctx, _cmu->gate_clks, - _cmu->nr_gate_clks); - exynos5_subcmu_clk_save(ctx->reg_base, _cmu->suspend_regs, - _cmu->nr_suspend_regs); + exynos5_subcmu_defer_gate(ctx, (*_cmu)->gate_clks, + (*_cmu)->nr_gate_clks); + exynos5_subcmu_clk_save(ctx->reg_base, (*_cmu)->suspend_regs, + (*_cmu)->nr_suspend_regs); } } @@ -163,9 +163,9 @@ static int __init exynos5_clk_probe(struct platform_device *pdev) if (of_property_read_string(np, "label", &name) < 0) continue; for (i = 0; i < nr_cmus; i++) - if (strcmp(cmu[i].pd_name, name) == 0) + if (strcmp(cmu[i]->pd_name, name) == 0) exynos5_clk_register_subcmu(&pdev->dev, - &cmu[i], np); + cmu[i], np); } return 0; } diff --git a/drivers/clk/samsung/clk-exynos5-subcmu.h b/drivers/clk/samsung/clk-exynos5-subcmu.h index 755ee8aaa3de..9ae5356f25aa 100644 --- a/drivers/clk/samsung/clk-exynos5-subcmu.h +++ b/drivers/clk/samsung/clk-exynos5-subcmu.h @@ -21,6 +21,6 @@ struct exynos5_subcmu_info { }; void exynos5_subcmus_init(struct samsung_clk_provider *ctx, int nr_cmus, - const struct exynos5_subcmu_info *cmu); + const struct exynos5_subcmu_info **cmu); #endif diff --git a/drivers/clk/samsung/clk-exynos5250.c b/drivers/clk/samsung/clk-exynos5250.c index f2b896881768..931c70a4da19 100644 --- a/drivers/clk/samsung/clk-exynos5250.c +++ b/drivers/clk/samsung/clk-exynos5250.c @@ -681,6 +681,10 @@ static const struct exynos5_subcmu_info exynos5250_disp_subcmu = { .pd_name = "DISP1", }; +static const struct exynos5_subcmu_info *exynos5250_subcmus[] = { + &exynos5250_disp_subcmu, +}; + static const struct samsung_pll_rate_table vpll_24mhz_tbl[] __initconst = { /* sorted in descending order */ /* PLL_36XX_RATE(rate, m, p, s, k) */ @@ -843,7 +847,8 @@ static void __init exynos5250_clk_init(struct device_node *np) samsung_clk_sleep_init(reg_base, exynos5250_clk_regs, ARRAY_SIZE(exynos5250_clk_regs)); - exynos5_subcmus_init(ctx, 1, &exynos5250_disp_subcmu); + exynos5_subcmus_init(ctx, ARRAY_SIZE(exynos5250_subcmus), + exynos5250_subcmus); samsung_clk_of_add_provider(np, ctx); diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c index 01bca5a498b2..fdb17c799aa5 100644 --- a/drivers/clk/samsung/clk-exynos5420.c +++ b/drivers/clk/samsung/clk-exynos5420.c @@ -1281,32 +1281,40 @@ static struct exynos5_subcmu_reg_dump exynos5x_mfc_suspend_regs[] = { { DIV4_RATIO, 0, 0x3 }, /* DIV dout_mfc_blk */ }; -static const struct exynos5_subcmu_info exynos5x_subcmus[] = { - { - .div_clks = exynos5x_disp_div_clks, - .nr_div_clks = ARRAY_SIZE(exynos5x_disp_div_clks), - .gate_clks = exynos5x_disp_gate_clks, - .nr_gate_clks = ARRAY_SIZE(exynos5x_disp_gate_clks), - .suspend_regs = exynos5x_disp_suspend_regs, - .nr_suspend_regs = ARRAY_SIZE(exynos5x_disp_suspend_regs), - .pd_name = "DISP", - }, { - .div_clks = exynos5x_gsc_div_clks, - .nr_div_clks = ARRAY_SIZE(exynos5x_gsc_div_clks), - .gate_clks = exynos5x_gsc_gate_clks, - .nr_gate_clks = ARRAY_SIZE(exynos5x_gsc_gate_clks), - .suspend_regs = exynos5x_gsc_suspend_regs, - .nr_suspend_regs = ARRAY_SIZE(exynos5x_gsc_suspend_regs), - .pd_name = "GSC", - }, { - .div_clks = exynos5x_mfc_div_clks, - .nr_div_clks = ARRAY_SIZE(exynos5x_mfc_div_clks), - .gate_clks = exynos5x_mfc_gate_clks, - .nr_gate_clks = ARRAY_SIZE(exynos5x_mfc_gate_clks), - .suspend_regs = exynos5x_mfc_suspend_regs, - .nr_suspend_regs = ARRAY_SIZE(exynos5x_mfc_suspend_regs), - .pd_name = "MFC", - }, +static const struct exynos5_subcmu_info exynos5x_disp_subcmu = { + .div_clks = exynos5x_disp_div_clks, + .nr_div_clks = ARRAY_SIZE(exynos5x_disp_div_clks), + .gate_clks = exynos5x_disp_gate_clks, + .nr_gate_clks = ARRAY_SIZE(exynos5x_disp_gate_clks), + .suspend_regs = exynos5x_disp_suspend_regs, + .nr_suspend_regs = ARRAY_SIZE(exynos5x_disp_suspend_regs), + .pd_name = "DISP", +}; + +static const struct exynos5_subcmu_info exynos5x_gsc_subcmu = { + .div_clks = exynos5x_gsc_div_clks, + .nr_div_clks = ARRAY_SIZE(exynos5x_gsc_div_clks), + .gate_clks = exynos5x_gsc_gate_clks, + .nr_gate_clks = ARRAY_SIZE(exynos5x_gsc_gate_clks), + .suspend_regs = exynos5x_gsc_suspend_regs, + .nr_suspend_regs = ARRAY_SIZE(exynos5x_gsc_suspend_regs), + .pd_name = "GSC", +}; + +static const struct exynos5_subcmu_info exynos5x_mfc_subcmu = { + .div_clks = exynos5x_mfc_div_clks, + .nr_div_clks = ARRAY_SIZE(exynos5x_mfc_div_clks), + .gate_clks = exynos5x_mfc_gate_clks, + .nr_gate_clks = ARRAY_SIZE(exynos5x_mfc_gate_clks), + .suspend_regs = exynos5x_mfc_suspend_regs, + .nr_suspend_regs = ARRAY_SIZE(exynos5x_mfc_suspend_regs), + .pd_name = "MFC", +}; + +static const struct exynos5_subcmu_info *exynos5x_subcmus[] = { + &exynos5x_disp_subcmu, + &exynos5x_gsc_subcmu, + &exynos5x_mfc_subcmu, }; static const struct samsung_pll_rate_table exynos5420_pll2550x_24mhz_tbl[] __initconst = { -- cgit v1.2.1 From b6adeb6bc61c2567b9efd815d61a61b34a2e51a6 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Thu, 8 Aug 2019 16:49:29 +0200 Subject: clk: samsung: exynos5800: Move MAU subsystem clocks to MAU sub-CMU This patch fixes broken sound on Exynos5422/5800 platforms after system/suspend resume cycle in cases where the audio root clock is derived from MAU_EPLL_CLK. In order to preserve state of the USER_MUX_MAU_EPLL_CLK clock mux during system suspend/resume cycle for Exynos5800 we group the MAU block input clocks in "MAU" sub-CMU and add the clock mux control bit to .suspend_regs. This ensures that user configuration of the mux is not lost after the PMU block changes the mux setting to OSC_DIV when switching off the MAU power domain. Adding the SRC_TOP9 register to exynos5800_clk_regs[] array is not sufficient as at the time of the syscore_ops suspend call MAU power domain is already turned off and we already save and subsequently restore an incorrect register's value. Fixes: b06a532bf1fa ("clk: samsung: Add Exynos5 sub-CMU clock driver") Reported-by: Jaafar Ali Suggested-by: Marek Szyprowski Tested-by: Jaafar Ali Signed-off-by: Sylwester Nawrocki Link: https://lkml.kernel.org/r/20190808144929.18685-2-s.nawrocki@samsung.com Signed-off-by: Stephen Boyd --- drivers/clk/samsung/clk-exynos5420.c | 54 ++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c index fdb17c799aa5..2d18e1ae25d7 100644 --- a/drivers/clk/samsung/clk-exynos5420.c +++ b/drivers/clk/samsung/clk-exynos5420.c @@ -534,8 +534,6 @@ static const struct samsung_gate_clock exynos5800_gate_clks[] __initconst = { GATE_BUS_TOP, 24, 0, 0), GATE(CLK_ACLK432_SCALER, "aclk432_scaler", "mout_user_aclk432_scaler", GATE_BUS_TOP, 27, CLK_IS_CRITICAL, 0), - GATE(CLK_MAU_EPLL, "mau_epll", "mout_user_mau_epll", - SRC_MASK_TOP7, 20, CLK_SET_RATE_PARENT, 0), }; static const struct samsung_mux_clock exynos5420_mux_clks[] __initconst = { @@ -577,8 +575,13 @@ static const struct samsung_div_clock exynos5420_div_clks[] __initconst = { static const struct samsung_gate_clock exynos5420_gate_clks[] __initconst = { GATE(CLK_SECKEY, "seckey", "aclk66_psgen", GATE_BUS_PERIS1, 1, 0, 0), + /* Maudio Block */ GATE(CLK_MAU_EPLL, "mau_epll", "mout_mau_epll_clk", SRC_MASK_TOP7, 20, CLK_SET_RATE_PARENT, 0), + GATE(CLK_SCLK_MAUDIO0, "sclk_maudio0", "dout_maudio0", + GATE_TOP_SCLK_MAU, 0, CLK_SET_RATE_PARENT, 0), + GATE(CLK_SCLK_MAUPCM0, "sclk_maupcm0", "dout_maupcm0", + GATE_TOP_SCLK_MAU, 1, CLK_SET_RATE_PARENT, 0), }; static const struct samsung_mux_clock exynos5x_mux_clks[] __initconst = { @@ -1017,12 +1020,6 @@ static const struct samsung_gate_clock exynos5x_gate_clks[] __initconst = { GATE(CLK_SCLK_DP1, "sclk_dp1", "dout_dp1", GATE_TOP_SCLK_DISP1, 20, CLK_SET_RATE_PARENT, 0), - /* Maudio Block */ - GATE(CLK_SCLK_MAUDIO0, "sclk_maudio0", "dout_maudio0", - GATE_TOP_SCLK_MAU, 0, CLK_SET_RATE_PARENT, 0), - GATE(CLK_SCLK_MAUPCM0, "sclk_maupcm0", "dout_maupcm0", - GATE_TOP_SCLK_MAU, 1, CLK_SET_RATE_PARENT, 0), - /* FSYS Block */ GATE(CLK_TSI, "tsi", "aclk200_fsys", GATE_BUS_FSYS0, 0, 0, 0), GATE(CLK_PDMA0, "pdma0", "aclk200_fsys", GATE_BUS_FSYS0, 1, 0, 0), @@ -1281,6 +1278,20 @@ static struct exynos5_subcmu_reg_dump exynos5x_mfc_suspend_regs[] = { { DIV4_RATIO, 0, 0x3 }, /* DIV dout_mfc_blk */ }; + +static const struct samsung_gate_clock exynos5800_mau_gate_clks[] __initconst = { + GATE(CLK_MAU_EPLL, "mau_epll", "mout_user_mau_epll", + SRC_MASK_TOP7, 20, CLK_SET_RATE_PARENT, 0), + GATE(CLK_SCLK_MAUDIO0, "sclk_maudio0", "dout_maudio0", + GATE_TOP_SCLK_MAU, 0, CLK_SET_RATE_PARENT, 0), + GATE(CLK_SCLK_MAUPCM0, "sclk_maupcm0", "dout_maupcm0", + GATE_TOP_SCLK_MAU, 1, CLK_SET_RATE_PARENT, 0), +}; + +static struct exynos5_subcmu_reg_dump exynos5800_mau_suspend_regs[] = { + { SRC_TOP9, 0, BIT(8) }, /* MUX mout_user_mau_epll */ +}; + static const struct exynos5_subcmu_info exynos5x_disp_subcmu = { .div_clks = exynos5x_disp_div_clks, .nr_div_clks = ARRAY_SIZE(exynos5x_disp_div_clks), @@ -1311,12 +1322,27 @@ static const struct exynos5_subcmu_info exynos5x_mfc_subcmu = { .pd_name = "MFC", }; +static const struct exynos5_subcmu_info exynos5800_mau_subcmu = { + .gate_clks = exynos5800_mau_gate_clks, + .nr_gate_clks = ARRAY_SIZE(exynos5800_mau_gate_clks), + .suspend_regs = exynos5800_mau_suspend_regs, + .nr_suspend_regs = ARRAY_SIZE(exynos5800_mau_suspend_regs), + .pd_name = "MAU", +}; + static const struct exynos5_subcmu_info *exynos5x_subcmus[] = { &exynos5x_disp_subcmu, &exynos5x_gsc_subcmu, &exynos5x_mfc_subcmu, }; +static const struct exynos5_subcmu_info *exynos5800_subcmus[] = { + &exynos5x_disp_subcmu, + &exynos5x_gsc_subcmu, + &exynos5x_mfc_subcmu, + &exynos5800_mau_subcmu, +}; + static const struct samsung_pll_rate_table exynos5420_pll2550x_24mhz_tbl[] __initconst = { PLL_35XX_RATE(24 * MHZ, 2000000000, 250, 3, 0), PLL_35XX_RATE(24 * MHZ, 1900000000, 475, 6, 0), @@ -1547,11 +1573,17 @@ static void __init exynos5x_clk_init(struct device_node *np, samsung_clk_extended_sleep_init(reg_base, exynos5x_clk_regs, ARRAY_SIZE(exynos5x_clk_regs), exynos5420_set_clksrc, ARRAY_SIZE(exynos5420_set_clksrc)); - if (soc == EXYNOS5800) + + if (soc == EXYNOS5800) { samsung_clk_sleep_init(reg_base, exynos5800_clk_regs, ARRAY_SIZE(exynos5800_clk_regs)); - exynos5_subcmus_init(ctx, ARRAY_SIZE(exynos5x_subcmus), - exynos5x_subcmus); + + exynos5_subcmus_init(ctx, ARRAY_SIZE(exynos5800_subcmus), + exynos5800_subcmus); + } else { + exynos5_subcmus_init(ctx, ARRAY_SIZE(exynos5x_subcmus), + exynos5x_subcmus); + } samsung_clk_of_add_provider(np, ctx); } -- cgit v1.2.1 From baf7b79e1ad79a41fafd8ab8597b9a96962d822d Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 8 Aug 2019 14:18:39 +0200 Subject: clk: samsung: exynos542x: Move MSCL subsystem clocks to its sub-CMU M2M scaler clocks require special handling of their parent bus clock during power domain on/off sequences. MSCL clocks were not initially added to the sub-CMU handler, because that time there was no driver for the M2M scaler device and it was not possible to test it. This patch fixes this issue. Parent clock for M2M scaler devices is now properly preserved during MSC power domain on/off sequence. This gives M2M scaler devices proper performance: fullHD XRGB32 image 1000 rotations test takes 3.17s instead of 45.08s. Fixes: b06a532bf1fa ("clk: samsung: Add Exynos5 sub-CMU clock driver") Signed-off-by: Marek Szyprowski Link: https://lkml.kernel.org/r/20190808121839.23892-1-m.szyprowski@samsung.com Acked-by: Sylwester Nawrocki Signed-off-by: Stephen Boyd --- drivers/clk/samsung/clk-exynos5420.c | 48 +++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c index 2d18e1ae25d7..7670cc596c74 100644 --- a/drivers/clk/samsung/clk-exynos5420.c +++ b/drivers/clk/samsung/clk-exynos5420.c @@ -893,9 +893,6 @@ static const struct samsung_div_clock exynos5x_div_clks[] __initconst = { /* GSCL Block */ DIV(0, "dout_gscl_blk_333", "aclk333_432_gscl", DIV2_RATIO0, 6, 2), - /* MSCL Block */ - DIV(0, "dout_mscl_blk", "aclk400_mscl", DIV2_RATIO0, 28, 2), - /* PSGEN */ DIV(0, "dout_gen_blk", "mout_user_aclk266", DIV2_RATIO0, 8, 1), DIV(0, "dout_jpg_blk", "aclk166", DIV2_RATIO0, 20, 1), @@ -1159,17 +1156,6 @@ static const struct samsung_gate_clock exynos5x_gate_clks[] __initconst = { GATE(CLK_FIMC_LITE3, "fimc_lite3", "aclk333_432_gscl", GATE_IP_GSCL1, 17, 0, 0), - /* MSCL Block */ - GATE(CLK_MSCL0, "mscl0", "aclk400_mscl", GATE_IP_MSCL, 0, 0, 0), - GATE(CLK_MSCL1, "mscl1", "aclk400_mscl", GATE_IP_MSCL, 1, 0, 0), - GATE(CLK_MSCL2, "mscl2", "aclk400_mscl", GATE_IP_MSCL, 2, 0, 0), - GATE(CLK_SMMU_MSCL0, "smmu_mscl0", "dout_mscl_blk", - GATE_IP_MSCL, 8, 0, 0), - GATE(CLK_SMMU_MSCL1, "smmu_mscl1", "dout_mscl_blk", - GATE_IP_MSCL, 9, 0, 0), - GATE(CLK_SMMU_MSCL2, "smmu_mscl2", "dout_mscl_blk", - GATE_IP_MSCL, 10, 0, 0), - /* ISP */ GATE(CLK_SCLK_UART_ISP, "sclk_uart_isp", "dout_uart_isp", GATE_TOP_SCLK_ISP, 0, CLK_SET_RATE_PARENT, 0), @@ -1278,6 +1264,28 @@ static struct exynos5_subcmu_reg_dump exynos5x_mfc_suspend_regs[] = { { DIV4_RATIO, 0, 0x3 }, /* DIV dout_mfc_blk */ }; +static const struct samsung_gate_clock exynos5x_mscl_gate_clks[] __initconst = { + /* MSCL Block */ + GATE(CLK_MSCL0, "mscl0", "aclk400_mscl", GATE_IP_MSCL, 0, 0, 0), + GATE(CLK_MSCL1, "mscl1", "aclk400_mscl", GATE_IP_MSCL, 1, 0, 0), + GATE(CLK_MSCL2, "mscl2", "aclk400_mscl", GATE_IP_MSCL, 2, 0, 0), + GATE(CLK_SMMU_MSCL0, "smmu_mscl0", "dout_mscl_blk", + GATE_IP_MSCL, 8, 0, 0), + GATE(CLK_SMMU_MSCL1, "smmu_mscl1", "dout_mscl_blk", + GATE_IP_MSCL, 9, 0, 0), + GATE(CLK_SMMU_MSCL2, "smmu_mscl2", "dout_mscl_blk", + GATE_IP_MSCL, 10, 0, 0), +}; + +static const struct samsung_div_clock exynos5x_mscl_div_clks[] __initconst = { + DIV(0, "dout_mscl_blk", "aclk400_mscl", DIV2_RATIO0, 28, 2), +}; + +static struct exynos5_subcmu_reg_dump exynos5x_mscl_suspend_regs[] = { + { GATE_IP_MSCL, 0xffffffff, 0xffffffff }, /* MSCL gates */ + { SRC_TOP3, 0, BIT(4) }, /* MUX mout_user_aclk400_mscl */ + { DIV2_RATIO0, 0, 0x30000000 }, /* DIV dout_mscl_blk */ +}; static const struct samsung_gate_clock exynos5800_mau_gate_clks[] __initconst = { GATE(CLK_MAU_EPLL, "mau_epll", "mout_user_mau_epll", @@ -1322,6 +1330,16 @@ static const struct exynos5_subcmu_info exynos5x_mfc_subcmu = { .pd_name = "MFC", }; +static const struct exynos5_subcmu_info exynos5x_mscl_subcmu = { + .div_clks = exynos5x_mscl_div_clks, + .nr_div_clks = ARRAY_SIZE(exynos5x_mscl_div_clks), + .gate_clks = exynos5x_mscl_gate_clks, + .nr_gate_clks = ARRAY_SIZE(exynos5x_mscl_gate_clks), + .suspend_regs = exynos5x_mscl_suspend_regs, + .nr_suspend_regs = ARRAY_SIZE(exynos5x_mscl_suspend_regs), + .pd_name = "MSC", +}; + static const struct exynos5_subcmu_info exynos5800_mau_subcmu = { .gate_clks = exynos5800_mau_gate_clks, .nr_gate_clks = ARRAY_SIZE(exynos5800_mau_gate_clks), @@ -1334,12 +1352,14 @@ static const struct exynos5_subcmu_info *exynos5x_subcmus[] = { &exynos5x_disp_subcmu, &exynos5x_gsc_subcmu, &exynos5x_mfc_subcmu, + &exynos5x_mscl_subcmu, }; static const struct exynos5_subcmu_info *exynos5800_subcmus[] = { &exynos5x_disp_subcmu, &exynos5x_gsc_subcmu, &exynos5x_mfc_subcmu, + &exynos5x_mscl_subcmu, &exynos5800_mau_subcmu, }; -- cgit v1.2.1 From 891584f48a9084ba462f10da4c6bb28b6181b543 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 2 Aug 2019 17:15:03 +0200 Subject: inet: frags: re-introduce skb coalescing for local delivery Before commit d4289fcc9b16 ("net: IP6 defrag: use rbtrees for IPv6 defrag"), a netperf UDP_STREAM test[0] using big IPv6 datagrams (thus generating many fragments) and running over an IPsec tunnel, reported more than 6Gbps throughput. After that patch, the same test gets only 9Mbps when receiving on a be2net nic (driver can make a big difference here, for example, ixgbe doesn't seem to be affected). By reusing the IPv4 defragmentation code, IPv6 lost fragment coalescing (IPv4 fragment coalescing was dropped by commit 14fe22e33462 ("Revert "ipv4: use skb coalescing in defragmentation"")). Without fragment coalescing, be2net runs out of Rx ring entries and starts to drop frames (ethtool reports rx_drops_no_frags errors). Since the netperf traffic is only composed of UDP fragments, any lost packet prevents reassembly of the full datagram. Therefore, fragments which have no possibility to ever get reassembled pile up in the reassembly queue, until the memory accounting exeeds the threshold. At that point no fragment is accepted anymore, which effectively discards all netperf traffic. When reassembly timeout expires, some stale fragments are removed from the reassembly queue, so a few packets can be received, reassembled and delivered to the netperf receiver. But the nic still drops frames and soon the reassembly queue gets filled again with stale fragments. These long time frames where no datagram can be received explain why the performance drop is so significant. Re-introducing fragment coalescing is enough to get the initial performances again (6.6Gbps with be2net): driver doesn't drop frames anymore (no more rx_drops_no_frags errors) and the reassembly engine works at full speed. This patch is quite conservative and only coalesces skbs for local IPv4 and IPv6 delivery (in order to avoid changing skb geometry when forwarding). Coalescing could be extended in the future if need be, as more scenarios would probably benefit from it. [0]: Test configuration Sender: ip xfrm policy flush ip xfrm state flush ip xfrm state add src fc00:1::1 dst fc00:2::1 proto esp spi 0x1000 aead 'rfc4106(gcm(aes))' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b 96 mode transport sel src fc00:1::1 dst fc00:2::1 ip xfrm policy add src fc00:1::1 dst fc00:2::1 dir in tmpl src fc00:1::1 dst fc00:2::1 proto esp mode transport action allow ip xfrm state add src fc00:2::1 dst fc00:1::1 proto esp spi 0x1001 aead 'rfc4106(gcm(aes))' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b 96 mode transport sel src fc00:2::1 dst fc00:1::1 ip xfrm policy add src fc00:2::1 dst fc00:1::1 dir out tmpl src fc00:2::1 dst fc00:1::1 proto esp mode transport action allow netserver -D -L fc00:2::1 Receiver: ip xfrm policy flush ip xfrm state flush ip xfrm state add src fc00:2::1 dst fc00:1::1 proto esp spi 0x1001 aead 'rfc4106(gcm(aes))' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b 96 mode transport sel src fc00:2::1 dst fc00:1::1 ip xfrm policy add src fc00:2::1 dst fc00:1::1 dir in tmpl src fc00:2::1 dst fc00:1::1 proto esp mode transport action allow ip xfrm state add src fc00:1::1 dst fc00:2::1 proto esp spi 0x1000 aead 'rfc4106(gcm(aes))' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b 96 mode transport sel src fc00:1::1 dst fc00:2::1 ip xfrm policy add src fc00:1::1 dst fc00:2::1 dir out tmpl src fc00:1::1 dst fc00:2::1 proto esp mode transport action allow netperf -H fc00:2::1 -f k -P 0 -L fc00:1::1 -l 60 -t UDP_STREAM -I 99,5 -i 5,5 -T5,5 -6 Signed-off-by: Guillaume Nault Acked-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 +- net/ieee802154/6lowpan/reassembly.c | 2 +- net/ipv4/inet_fragment.c | 39 +++++++++++++++++++++++---------- net/ipv4/ip_fragment.c | 8 ++++++- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/reassembly.c | 2 +- 6 files changed, 39 insertions(+), 16 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 010f26b31c89..bac79e817776 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -171,7 +171,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, struct sk_buff *parent); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, - void *reasm_data); + void *reasm_data, bool try_coalesce); struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q); #endif diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index e4aba5d485be..bbe9b3b2d395 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -170,7 +170,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb, reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); if (!reasm_data) goto out_oom; - inet_frag_reasm_finish(&fq->q, skb, reasm_data); + inet_frag_reasm_finish(&fq->q, skb, reasm_data, false); skb->dev = ldev; skb->tstamp = fq->q.stamp; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index a999451345f9..10d31733297d 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -475,11 +475,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, EXPORT_SYMBOL(inet_frag_reasm_prepare); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, - void *reasm_data) + void *reasm_data, bool try_coalesce) { struct sk_buff **nextp = (struct sk_buff **)reasm_data; struct rb_node *rbn; struct sk_buff *fp; + int sum_truesize; skb_push(head, head->data - skb_network_header(head)); @@ -487,25 +488,41 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, fp = FRAG_CB(head)->next_frag; rbn = rb_next(&head->rbnode); rb_erase(&head->rbnode, &q->rb_fragments); + + sum_truesize = head->truesize; while (rbn || fp) { /* fp points to the next sk_buff in the current run; * rbn points to the next run. */ /* Go through the current run. */ while (fp) { - *nextp = fp; - nextp = &fp->next; - fp->prev = NULL; - memset(&fp->rbnode, 0, sizeof(fp->rbnode)); - fp->sk = NULL; - head->data_len += fp->len; - head->len += fp->len; + struct sk_buff *next_frag = FRAG_CB(fp)->next_frag; + bool stolen; + int delta; + + sum_truesize += fp->truesize; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); - head->truesize += fp->truesize; - fp = FRAG_CB(fp)->next_frag; + + if (try_coalesce && skb_try_coalesce(head, fp, &stolen, + &delta)) { + kfree_skb_partial(fp, stolen); + } else { + fp->prev = NULL; + memset(&fp->rbnode, 0, sizeof(fp->rbnode)); + fp->sk = NULL; + + head->data_len += fp->len; + head->len += fp->len; + head->truesize += fp->truesize; + + *nextp = fp; + nextp = &fp->next; + } + + fp = next_frag; } /* Move to the next run. */ if (rbn) { @@ -516,7 +533,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, rbn = rbnext; } } - sub_frag_mem_limit(q->fqdir, head->truesize); + sub_frag_mem_limit(q->fqdir, sum_truesize); *nextp = NULL; skb_mark_not_on_list(head); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 4385eb9e781f..cfeb8890f94e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -393,6 +393,11 @@ err: return err; } +static bool ip_frag_coalesce_ok(const struct ipq *qp) +{ + return qp->q.key.v4.user == IP_DEFRAG_LOCAL_DELIVER; +} + /* Build a new IP datagram from all its fragments. */ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, struct sk_buff *prev_tail, struct net_device *dev) @@ -421,7 +426,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, if (len > 65535) goto out_oversize; - inet_frag_reasm_finish(&qp->q, skb, reasm_data); + inet_frag_reasm_finish(&qp->q, skb, reasm_data, + ip_frag_coalesce_ok(qp)); skb->dev = dev; IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 0f82c150543b..fed9666a2f7d 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -348,7 +348,7 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, skb_reset_transport_header(skb); - inet_frag_reasm_finish(&fq->q, skb, reasm_data); + inet_frag_reasm_finish(&fq->q, skb, reasm_data, false); skb->ignore_df = 1; skb->dev = dev; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index ca05b16f1bb9..1f5d4d196dcc 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -282,7 +282,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, skb_reset_transport_header(skb); - inet_frag_reasm_finish(&fq->q, skb, reasm_data); + inet_frag_reasm_finish(&fq->q, skb, reasm_data, true); skb->dev = dev; ipv6_hdr(skb)->payload_len = htons(payload_len); -- cgit v1.2.1 From 3a0233ddec554b886298de2428edb5c50a20e694 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Mon, 5 Aug 2019 16:34:34 +0100 Subject: xen/netback: Reset nr_frags before freeing skb At this point nr_frags has been incremented but the frag does not yet have a page assigned so freeing the skb results in a crash. Reset nr_frags before freeing the skb to prevent this. Signed-off-by: Ross Lagerwall Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 1d9940d4e8c7..c9262ffeefe4 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -925,6 +925,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS; nskb = xenvif_alloc_skb(0); if (unlikely(nskb == NULL)) { + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); xenvif_tx_err(queue, &txreq, extra_count, idx); if (net_ratelimit()) @@ -940,6 +941,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, if (xenvif_set_skb_gso(queue->vif, skb, gso)) { /* Failure in xenvif_set_skb_gso is fatal. */ + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); kfree_skb(nskb); break; -- cgit v1.2.1 From 7e7c076e123ae8c4faa1966fc0da64f6e24eb57e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 5 Aug 2019 15:30:02 -0700 Subject: docs: admin-guide: remove references to IPX and token-ring Both IPX and TR have not been supported for a while now. Remove them from the /proc/sys/net documentation. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- Documentation/admin-guide/sysctl/net.rst | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index a7d44e71019d..287b98708a40 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -39,7 +39,6 @@ Table : Subdirectories in /proc/sys/net 802 E802 protocol ax25 AX25 ethernet Ethernet protocol rose X.25 PLP layer ipv4 IP version 4 x25 X.25 protocol - ipx IPX token-ring IBM token ring bridge Bridging decnet DEC net ipv6 IP version 6 tipc TIPC ========= =================== = ========== ================== @@ -401,33 +400,7 @@ interface. (network) that the route leads to, the router (may be directly connected), the route flags, and the device the route is using. - -5. IPX ------- - -The IPX protocol has no tunable values in proc/sys/net. - -The IPX protocol does, however, provide proc/net/ipx. This lists each IPX -socket giving the local and remote addresses in Novell format (that is -network:node:port). In accordance with the strange Novell tradition, -everything but the port is in hex. Not_Connected is displayed for sockets that -are not tied to a specific remote address. The Tx and Rx queue sizes indicate -the number of bytes pending for transmission and reception. The state -indicates the state the socket is in and the uid is the owning uid of the -socket. - -The /proc/net/ipx_interface file lists all IPX interfaces. For each interface -it gives the network number, the node number, and indicates if the network is -the primary network. It also indicates which device it is bound to (or -Internal for internal networks) and the Frame Type if appropriate. Linux -supports 802.3, 802.2, 802.2 SNAP and DIX (Blue Book) ethernet framing for -IPX. - -The /proc/net/ipx_route table holds a list of IPX routes. For each route it -gives the destination network, the router node (or Directly) and the network -address of the router (or Connected) for internal networks. - -6. TIPC +5. TIPC ------- tipc_rmem -- cgit v1.2.1 From fe90689fed119cb55ff04e6a1df0817f0a3e9d32 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 5 Aug 2019 15:30:03 -0700 Subject: net: docs: replace IPX in tuntap documentation IPX is no longer supported, but the example in the documentation might useful. Replace it with IPv6. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- Documentation/networking/tuntap.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/tuntap.txt b/Documentation/networking/tuntap.txt index 949d5dcdd9a3..0104830d5075 100644 --- a/Documentation/networking/tuntap.txt +++ b/Documentation/networking/tuntap.txt @@ -204,8 +204,8 @@ Ethernet device, which instead of receiving packets from a physical media, receives them from user space program and instead of sending packets via physical media sends them to the user space program. -Let's say that you configured IPX on the tap0, then whenever -the kernel sends an IPX packet to tap0, it is passed to the application +Let's say that you configured IPv6 on the tap0, then whenever +the kernel sends an IPv6 packet to tap0, it is passed to the application (VTun for example). The application encrypts, compresses and sends it to the other side over TCP or UDP. The application on the other side decompresses and decrypts the data received and writes the packet to the TAP device, -- cgit v1.2.1 From 51650d33b2771acd505068da669cf85cffac369a Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 7 Aug 2019 01:45:40 +0300 Subject: net: sched: sch_taprio: fix memleak in error path for sched list parse In error case, all entries should be freed from the sched list before deleting it. For simplicity use rcu way. Fixes: 5a781ccbd19e46 ("tc: Add support for configuring the taprio scheduler") Acked-by: Vinicius Costa Gomes Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index c39db507ba3f..e25d414ae12f 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1195,7 +1195,8 @@ unlock: spin_unlock_bh(qdisc_lock(sch)); free_sched: - kfree(new_admin); + if (new_admin) + call_rcu(&new_admin->rcu, taprio_free_sched_cb); return err; } -- cgit v1.2.1 From d595b03de2cb0bdf9bcdf35ff27840cc3a37158f Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 7 Aug 2019 10:19:59 +0800 Subject: bonding: Add vlan tx offload to hw_enc_features As commit 30d8177e8ac7 ("bonding: Always enable vlan tx offload") said, we should always enable bonding's vlan tx offload, pass the vlan packets to the slave devices with vlan tci, let them to handle vlan implementation. Now if encapsulation protocols like VXLAN is used, skb->encapsulation may be set, then the packet is passed to vlan device which based on bonding device. However in netif_skb_features(), the check of hw_enc_features: if (skb->encapsulation) features &= dev->hw_enc_features; clears NETIF_F_HW_VLAN_CTAG_TX/NETIF_F_HW_VLAN_STAG_TX. This results in same issue in commit 30d8177e8ac7 like this: vlan_dev_hard_start_xmit -->dev_queue_xmit -->validate_xmit_skb -->netif_skb_features //NETIF_F_HW_VLAN_CTAG_TX is cleared -->validate_xmit_vlan -->__vlan_hwaccel_push_inside //skb->tci is cleared ... --> bond_start_xmit --> bond_xmit_hash //BOND_XMIT_POLICY_ENCAP34 --> __skb_flow_dissect // nhoff point to IP header --> case htons(ETH_P_8021Q) // skb_vlan_tag_present is false, so vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), //vlan point to ip header wrongly Fixes: b2a103e6d0af ("bonding: convert to ndo_fix_features") Signed-off-by: YueHaibing Acked-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 02fd7822c14a..931d9d935686 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1126,6 +1126,8 @@ static void bond_compute_features(struct bonding *bond) done: bond_dev->vlan_features = vlan_features; bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX | NETIF_F_GSO_UDP_L4; bond_dev->mpls_features = mpls_features; bond_dev->gso_max_segs = gso_max_segs; -- cgit v1.2.1 From e3e3af9aa29a2ada43d5c27b47ea320415cd5bb3 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 7 Aug 2019 21:08:56 +0800 Subject: net: dsa: sja1105: remove set but not used variables 'tx_vid' and 'rx_vid' Fixes gcc '-Wunused-but-set-variable' warning: drivers/net/dsa/sja1105/sja1105_main.c: In function sja1105_fdb_dump: drivers/net/dsa/sja1105/sja1105_main.c:1226:14: warning: variable tx_vid set but not used [-Wunused-but-set-variable] drivers/net/dsa/sja1105/sja1105_main.c:1226:6: warning: variable rx_vid set but not used [-Wunused-but-set-variable] They are not used since commit 6d7c7d948a2e ("net: dsa: sja1105: Fix broken learning with vlan_filtering disabled") Reported-by: Hulk Robot Signed-off-by: YueHaibing Reviewed-by: Vladimir Oltean Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index d073baffc20b..df976b259e43 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1223,12 +1223,8 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port, { struct sja1105_private *priv = ds->priv; struct device *dev = ds->dev; - u16 rx_vid, tx_vid; int i; - rx_vid = dsa_8021q_rx_vid(ds, port); - tx_vid = dsa_8021q_tx_vid(ds, port); - for (i = 0; i < SJA1105_MAX_L2_LOOKUP_COUNT; i++) { struct sja1105_l2_lookup_entry l2_lookup = {0}; u8 macaddr[ETH_ALEN]; -- cgit v1.2.1 From e1fea322fc6d4075254ca9c5f2afdace0281da2a Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Wed, 7 Aug 2019 15:57:28 -0400 Subject: net sched: update skbedit action for batched events operations Add get_fill_size() routine used to calculate the action size when building a batch of events. Fixes: ca9b0e27e ("pkt_action: add new action skbedit") Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- net/sched/act_skbedit.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index b100870f02a6..37dced00b63d 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -307,6 +307,17 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } +static size_t tcf_skbedit_get_fill_size(const struct tc_action *act) +{ + return nla_total_size(sizeof(struct tc_skbedit)) + + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_PRIORITY */ + + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING */ + + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MARK */ + + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_PTYPE */ + + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MASK */ + + nla_total_size_64bit(sizeof(u64)); /* TCA_SKBEDIT_FLAGS */ +} + static struct tc_action_ops act_skbedit_ops = { .kind = "skbedit", .id = TCA_ID_SKBEDIT, @@ -316,6 +327,7 @@ static struct tc_action_ops act_skbedit_ops = { .init = tcf_skbedit_init, .cleanup = tcf_skbedit_cleanup, .walk = tcf_skbedit_walker, + .get_fill_size = tcf_skbedit_get_fill_size, .lookup = tcf_skbedit_search, .size = sizeof(struct tcf_skbedit), }; -- cgit v1.2.1 From 7bc161846dcf4af0485f260930d17fdd892a4980 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Wed, 7 Aug 2019 15:57:29 -0400 Subject: tc-testing: updated skbedit action tests with batch create/delete Update TDC tests with cases varifying ability of TC to install or delete batches of skbedit actions. Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/actions/skbedit.json | 47 ++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json index bf5ebf59c2d4..9cdd2e31ac2c 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json @@ -670,5 +670,52 @@ "teardown": [ "$TC actions flush action skbedit" ] + }, + { + "id": "630c", + "name": "Add batch of 32 skbedit actions with all parameters and cookie", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action skbedit queue_mapping 2 priority 10 mark 7/0xaabbccdd ptype host inheritdsfield index \\$i cookie aabbccddeeff112233445566778800a1 \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "^[ \t]+index [0-9]+ ref", + "matchCount": "32", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "706d", + "name": "Delete batch of 32 skbedit actions with all parameters", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ], + "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action skbedit queue_mapping 2 priority 10 mark 7/0xaabbccdd ptype host inheritdsfield index \\$i \\\"; args=\\\"\\$args\\$cmd\\\"; done && $TC actions add \\$args\"" + ], + "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action skbedit index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "^[ \t]+index [0-9]+ ref", + "matchCount": "0", + "teardown": [] } ] -- cgit v1.2.1 From 414776621d1006e57e80e6db7fdc3837897aaa64 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 7 Aug 2019 17:03:59 -0700 Subject: net/tls: prevent skb_orphan() from leaking TLS plain text with offload sk_validate_xmit_skb() and drivers depend on the sk member of struct sk_buff to identify segments requiring encryption. Any operation which removes or does not preserve the original TLS socket such as skb_orphan() or skb_clone() will cause clear text leaks. Make the TCP socket underlying an offloaded TLS connection mark all skbs as decrypted, if TLS TX is in offload mode. Then in sk_validate_xmit_skb() catch skbs which have no socket (or a socket with no validation) and decrypted flag set. Note that CONFIG_SOCK_VALIDATE_XMIT, CONFIG_TLS_DEVICE and sk->sk_validate_xmit_skb are slightly interchangeable right now, they all imply TLS offload. The new checks are guarded by CONFIG_TLS_DEVICE because that's the option guarding the sk_buff->decrypted member. Second, smaller issue with orphaning is that it breaks the guarantee that packets will be delivered to device queues in-order. All TLS offload drivers depend on that scheduling property. This means skb_orphan_partial()'s trick of preserving partial socket references will cause issues in the drivers. We need a full orphan, and as a result netem delay/throttling will cause all TLS offload skbs to be dropped. Reusing the sk_buff->decrypted flag also protects from leaking clear text when incoming, decrypted skb is redirected (e.g. by TC). See commit 0608c69c9a80 ("bpf: sk_msg, sock{map|hash} redirect through ULP") for justification why the internal flag is safe. The only location which could leak the flag in is tcp_bpf_sendmsg(), which is taken care of by clearing the previously unused bit. v2: - remove superfluous decrypted mark copy (Willem); - remove the stale doc entry (Boris); - rely entirely on EOR marking to prevent coalescing (Boris); - use an internal sendpages flag instead of marking the socket (Boris). v3 (Willem): - reorganize the can_skb_orphan_partial() condition; - fix the flag leak-in through tcp_bpf_sendmsg. Signed-off-by: Jakub Kicinski Acked-by: Willem de Bruijn Reviewed-by: Boris Pismenny Signed-off-by: David S. Miller --- Documentation/networking/tls-offload.rst | 18 ------------------ include/linux/skbuff.h | 8 ++++++++ include/linux/socket.h | 3 +++ include/net/sock.h | 10 +++++++++- net/core/sock.c | 19 ++++++++++++++----- net/ipv4/tcp.c | 3 +++ net/ipv4/tcp_bpf.c | 6 +++++- net/ipv4/tcp_output.c | 3 +++ net/tls/tls_device.c | 9 +++++++-- 9 files changed, 52 insertions(+), 27 deletions(-) diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst index b70b70dc4524..0dd3f748239f 100644 --- a/Documentation/networking/tls-offload.rst +++ b/Documentation/networking/tls-offload.rst @@ -506,21 +506,3 @@ Drivers should ignore the changes to TLS the device feature flags. These flags will be acted upon accordingly by the core ``ktls`` code. TLS device feature flags only control adding of new TLS connection offloads, old connections will remain active after flags are cleared. - -Known bugs -========== - -skb_orphan() leaks clear text ------------------------------ - -Currently drivers depend on the :c:member:`sk` member of -:c:type:`struct sk_buff ` to identify segments requiring -encryption. Any operation which removes or does not preserve the socket -association such as :c:func:`skb_orphan` or :c:func:`skb_clone` -will cause the driver to miss the packets and lead to clear text leaks. - -Redirects leak clear text -------------------------- - -In the RX direction, if segment has already been decrypted by the device -and it gets redirected or mirrored - clear text will be transmitted out. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d8af86d995d6..ba5583522d24 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1374,6 +1374,14 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) to->l4_hash = from->l4_hash; }; +static inline void skb_copy_decrypted(struct sk_buff *to, + const struct sk_buff *from) +{ +#ifdef CONFIG_TLS_DEVICE + to->decrypted = from->decrypted; +#endif +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { diff --git a/include/linux/socket.h b/include/linux/socket.h index 97523818cb14..fc0bed59fc84 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -292,6 +292,9 @@ struct ucred { #define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */ #define MSG_EOF MSG_FIN #define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */ +#define MSG_SENDPAGE_DECRYPTED 0x100000 /* sendpage() internal : page may carry + * plain text and require encryption + */ #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ diff --git a/include/net/sock.h b/include/net/sock.h index 228db3998e46..2c53f1a1d905 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2482,6 +2482,7 @@ static inline bool sk_fullsock(const struct sock *sk) /* Checks if this SKB belongs to an HW offloaded socket * and whether any SW fallbacks are required based on dev. + * Check decrypted mark in case skb_orphan() cleared socket. */ static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) @@ -2489,8 +2490,15 @@ static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb, #ifdef CONFIG_SOCK_VALIDATE_XMIT struct sock *sk = skb->sk; - if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) + if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) { skb = sk->sk_validate_xmit_skb(sk, dev, skb); +#ifdef CONFIG_TLS_DEVICE + } else if (unlikely(skb->decrypted)) { + pr_warn_ratelimited("unencrypted skb with no associated socket - dropping\n"); + kfree_skb(skb); + skb = NULL; +#endif + } #endif return skb; diff --git a/net/core/sock.c b/net/core/sock.c index d57b0cc995a0..6d08553f885c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1992,6 +1992,19 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) } EXPORT_SYMBOL(skb_set_owner_w); +static bool can_skb_orphan_partial(const struct sk_buff *skb) +{ +#ifdef CONFIG_TLS_DEVICE + /* Drivers depend on in-order delivery for crypto offload, + * partial orphan breaks out-of-order-OK logic. + */ + if (skb->decrypted) + return false; +#endif + return (skb->destructor == sock_wfree || + (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree)); +} + /* This helper is used by netem, as it can hold packets in its * delay queue. We want to allow the owner socket to send more * packets, as if they were already TX completed by a typical driver. @@ -2003,11 +2016,7 @@ void skb_orphan_partial(struct sk_buff *skb) if (skb_is_tcp_pure_ack(skb)) return; - if (skb->destructor == sock_wfree -#ifdef CONFIG_INET - || skb->destructor == tcp_wfree -#endif - ) { + if (can_skb_orphan_partial(skb)) { struct sock *sk = skb->sk; if (refcount_inc_not_zero(&sk->sk_refcnt)) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 776905899ac0..77b485d60b9d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -984,6 +984,9 @@ new_segment: if (!skb) goto wait_for_memory; +#ifdef CONFIG_TLS_DEVICE + skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); +#endif skb_entail(sk, skb); copy = size_goal; } diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 3d1e15401384..8a56e09cfb0e 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -398,10 +398,14 @@ more_data: static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct sk_msg tmp, *msg_tx = NULL; - int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS; int copied = 0, err = 0; struct sk_psock *psock; long timeo; + int flags; + + /* Don't let internal do_tcp_sendpages() flags through */ + flags = (msg->msg_flags & ~MSG_SENDPAGE_DECRYPTED); + flags |= MSG_NO_SHARED_FRAGS; psock = sk_psock_get(sk); if (unlikely(!psock)) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6e4afc48d7bb..979520e46e33 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1320,6 +1320,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, buff = sk_stream_alloc_skb(sk, nsize, gfp, true); if (!buff) return -ENOMEM; /* We'll just try again later. */ + skb_copy_decrypted(buff, skb); sk->sk_wmem_queued += buff->truesize; sk_mem_charge(sk, buff->truesize); @@ -1874,6 +1875,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, buff = sk_stream_alloc_skb(sk, 0, gfp, true); if (unlikely(!buff)) return -ENOMEM; + skb_copy_decrypted(buff, skb); sk->sk_wmem_queued += buff->truesize; sk_mem_charge(sk, buff->truesize); @@ -2143,6 +2145,7 @@ static int tcp_mtu_probe(struct sock *sk) sk_mem_charge(sk, nskb->truesize); skb = tcp_send_head(sk); + skb_copy_decrypted(nskb, skb); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 7c0b2b778703..43922d86e510 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -373,9 +373,9 @@ static int tls_push_data(struct sock *sk, struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); - int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST; int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE); struct tls_record_info *record = ctx->open_record; + int tls_push_record_flags; struct page_frag *pfrag; size_t orig_size = size; u32 max_open_record_len; @@ -390,6 +390,9 @@ static int tls_push_data(struct sock *sk, if (sk->sk_err) return -sk->sk_err; + flags |= MSG_SENDPAGE_DECRYPTED; + tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST; + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); if (tls_is_partially_sent_record(tls_ctx)) { rc = tls_push_partial_record(sk, tls_ctx, flags); @@ -576,7 +579,9 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx) gfp_t sk_allocation = sk->sk_allocation; sk->sk_allocation = GFP_ATOMIC; - tls_push_partial_record(sk, ctx, MSG_DONTWAIT | MSG_NOSIGNAL); + tls_push_partial_record(sk, ctx, + MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_SENDPAGE_DECRYPTED); sk->sk_allocation = sk_allocation; } } -- cgit v1.2.1 From 227f2f030e28d8783c3d10ce70ff4ba79cad653f Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 8 Aug 2019 14:22:47 +0800 Subject: team: Add vlan tx offload to hw_enc_features We should also enable team's vlan tx offload in hw_enc_features, pass the vlan packets to the slave devices with vlan tci, let the slave handle vlan tunneling offload implementation. Fixes: 3268e5cb494d ("team: Advertise tunneling offload features") Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- drivers/net/team/team.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index abfa0da9bbd2..e8089def5a46 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1004,6 +1004,8 @@ static void __team_compute_features(struct team *team) team->dev->vlan_features = vlan_features; team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX | NETIF_F_GSO_UDP_L4; team->dev->hard_header_len = max_hard_header_len; -- cgit v1.2.1 From 8c25d0887a8bd0e1ca2074ac0c6dff173787a83b Mon Sep 17 00:00:00 2001 From: Fuqian Huang Date: Fri, 9 Aug 2019 13:35:39 +0800 Subject: net: tundra: tsi108: use spin_lock_irqsave instead of spin_lock_irq in IRQ context As spin_unlock_irq will enable interrupts. Function tsi108_stat_carry is called from interrupt handler tsi108_irq. Interrupts are enabled in interrupt handler. Use spin_lock_irqsave/spin_unlock_irqrestore instead of spin_(un)lock_irq in IRQ context to avoid this. Signed-off-by: Fuqian Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/tundra/tsi108_eth.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index 78a7de3fb622..c62f474b6d08 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -371,9 +371,10 @@ tsi108_stat_carry_one(int carry, int carry_bit, int carry_shift, static void tsi108_stat_carry(struct net_device *dev) { struct tsi108_prv_data *data = netdev_priv(dev); + unsigned long flags; u32 carry1, carry2; - spin_lock_irq(&data->misclock); + spin_lock_irqsave(&data->misclock, flags); carry1 = TSI_READ(TSI108_STAT_CARRY1); carry2 = TSI_READ(TSI108_STAT_CARRY2); @@ -441,7 +442,7 @@ static void tsi108_stat_carry(struct net_device *dev) TSI108_STAT_TXPAUSEDROP_CARRY, &data->tx_pause_drop); - spin_unlock_irq(&data->misclock); + spin_unlock_irqrestore(&data->misclock, flags); } /* Read a stat counter atomically with respect to carries. -- cgit v1.2.1 From 1109635b292c82e7a2aa15e38edb7c389e34b693 Mon Sep 17 00:00:00 2001 From: "Lowry Li (Arm Technology China)" Date: Fri, 2 Aug 2019 08:07:52 +0000 Subject: drm/komeda: Initialize and enable output polling on Komeda Initialize and enable output polling on Komeda. Changes since v1: 1. Enable the polling before registering the driver; 2. Disable the polling after unregistering the driver. Changes since v2: 1. If driver register failed, disable the polling. Signed-off-by: Lowry Li (Arm Technology China) Reviewed-by: James Qian Wang (Arm Technology China) Signed-off-by: james qian wang (Arm Technology China) Link: https://patchwork.freedesktop.org/patch/msgid/1564733249-24329-1-git-send-email-lowry.li@arm.com --- drivers/gpu/drm/arm/display/komeda/komeda_kms.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c index 419a8b0e5de8..d50e75f0b2bd 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "komeda_dev.h" #include "komeda_framebuffer.h" @@ -315,6 +316,8 @@ struct komeda_kms_dev *komeda_kms_attach(struct komeda_dev *mdev) drm->irq_enabled = true; + drm_kms_helper_poll_init(drm); + err = drm_dev_register(drm, 0); if (err) goto cleanup_mode_config; @@ -322,6 +325,7 @@ struct komeda_kms_dev *komeda_kms_attach(struct komeda_dev *mdev) return kms; cleanup_mode_config: + drm_kms_helper_poll_fini(drm); drm->irq_enabled = false; drm_mode_config_cleanup(drm); komeda_kms_cleanup_private_objs(kms); @@ -338,6 +342,7 @@ void komeda_kms_detach(struct komeda_kms_dev *kms) drm->irq_enabled = false; mdev->funcs->disable_irq(mdev); drm_dev_unregister(drm); + drm_kms_helper_poll_fini(drm); component_unbind_all(mdev->dev, drm); komeda_kms_cleanup_private_objs(kms); drm_mode_config_cleanup(drm); -- cgit v1.2.1 From 6a0a8d10a3661a036b55af695542a714c429ab7c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Aug 2019 11:01:27 +0200 Subject: netfilter: nf_tables: use-after-free in failing rule with bound set If a rule that has already a bound anonymous set fails to be added, the preparation phase releases the rule and the bound set. However, the transaction object from the abort path still has a reference to the set object that is stale, leading to a use-after-free when checking for the set->bound field. Add a new field to the transaction that specifies if the set is bound, so the abort path can skip releasing it since the rule command owns it and it takes care of releasing it. After this update, the set->bound field is removed. [ 24.649883] Unable to handle kernel paging request at virtual address 0000000000040434 [ 24.657858] Mem abort info: [ 24.660686] ESR = 0x96000004 [ 24.663769] Exception class = DABT (current EL), IL = 32 bits [ 24.669725] SET = 0, FnV = 0 [ 24.672804] EA = 0, S1PTW = 0 [ 24.675975] Data abort info: [ 24.678880] ISV = 0, ISS = 0x00000004 [ 24.682743] CM = 0, WnR = 0 [ 24.685723] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000428952000 [ 24.692207] [0000000000040434] pgd=0000000000000000 [ 24.697119] Internal error: Oops: 96000004 [#1] SMP [...] [ 24.889414] Call trace: [ 24.891870] __nf_tables_abort+0x3f0/0x7a0 [ 24.895984] nf_tables_abort+0x20/0x40 [ 24.899750] nfnetlink_rcv_batch+0x17c/0x588 [ 24.904037] nfnetlink_rcv+0x13c/0x190 [ 24.907803] netlink_unicast+0x18c/0x208 [ 24.911742] netlink_sendmsg+0x1b0/0x350 [ 24.915682] sock_sendmsg+0x4c/0x68 [ 24.919185] ___sys_sendmsg+0x288/0x2c8 [ 24.923037] __sys_sendmsg+0x7c/0xd0 [ 24.926628] __arm64_sys_sendmsg+0x2c/0x38 [ 24.930744] el0_svc_common.constprop.0+0x94/0x158 [ 24.935556] el0_svc_handler+0x34/0x90 [ 24.939322] el0_svc+0x8/0xc [ 24.942216] Code: 37280300 f9404023 91014262 aa1703e0 (f9401863) [ 24.948336] ---[ end trace cebbb9dcbed3b56f ]--- Fixes: f6ac85858976 ("netfilter: nf_tables: unbind set in rule from commit path") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 9 +++++++-- net/netfilter/nf_tables_api.c | 15 ++++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 9b624566b82d..475d6f28ca67 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -421,8 +421,7 @@ struct nft_set { unsigned char *udata; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - u16 flags:13, - bound:1, + u16 flags:14, genmask:2; u8 klen; u8 dlen; @@ -1348,12 +1347,15 @@ struct nft_trans_rule { struct nft_trans_set { struct nft_set *set; u32 set_id; + bool bound; }; #define nft_trans_set(trans) \ (((struct nft_trans_set *)trans->data)->set) #define nft_trans_set_id(trans) \ (((struct nft_trans_set *)trans->data)->set_id) +#define nft_trans_set_bound(trans) \ + (((struct nft_trans_set *)trans->data)->bound) struct nft_trans_chain { bool update; @@ -1384,12 +1386,15 @@ struct nft_trans_table { struct nft_trans_elem { struct nft_set *set; struct nft_set_elem elem; + bool bound; }; #define nft_trans_elem_set(trans) \ (((struct nft_trans_elem *)trans->data)->set) #define nft_trans_elem(trans) \ (((struct nft_trans_elem *)trans->data)->elem) +#define nft_trans_elem_set_bound(trans) \ + (((struct nft_trans_elem *)trans->data)->bound) struct nft_trans_obj { struct nft_object *obj; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 605a7cfe7ca7..88abbddf8967 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -138,9 +138,14 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) return; list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { - if (trans->msg_type == NFT_MSG_NEWSET && - nft_trans_set(trans) == set) { - set->bound = true; + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (nft_trans_set(trans) == set) + nft_trans_set_bound(trans) = true; + break; + case NFT_MSG_NEWSETELEM: + if (nft_trans_elem_set(trans) == set) + nft_trans_elem_set_bound(trans) = true; break; } } @@ -6906,7 +6911,7 @@ static int __nf_tables_abort(struct net *net) break; case NFT_MSG_NEWSET: trans->ctx.table->use--; - if (nft_trans_set(trans)->bound) { + if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); break; } @@ -6918,7 +6923,7 @@ static int __nf_tables_abort(struct net *net) nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: - if (nft_trans_elem_set(trans)->bound) { + if (nft_trans_elem_set_bound(trans)) { nft_trans_destroy(trans); break; } -- cgit v1.2.1 From 3e68db2f6422d711550a32cbc87abd97bb6efab3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Aug 2019 11:01:33 +0200 Subject: netfilter: nf_flow_table: conntrack picks up expired flows Update conntrack entry to pick up expired flows, otherwise the conntrack entry gets stuck with the internal offload timeout (one day). The TCP state also needs to be adjusted to ESTABLISHED state and tracking is set to liberal mode in order to give conntrack a chance to pick up the expired flow. Fixes: ac2a66665e23 ("netfilter: add generic flow table infrastructure") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index e3d797252a98..68a24471ffee 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -111,7 +111,7 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) #define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ) #define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ) -static void flow_offload_fixup_ct_state(struct nf_conn *ct) +static void flow_offload_fixup_ct(struct nf_conn *ct) { const struct nf_conntrack_l4proto *l4proto; unsigned int timeout; @@ -208,6 +208,11 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) } EXPORT_SYMBOL_GPL(flow_offload_add); +static inline bool nf_flow_has_expired(const struct flow_offload *flow) +{ + return (__s32)(flow->timeout - (u32)jiffies) <= 0; +} + static void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow) { @@ -223,6 +228,9 @@ static void flow_offload_del(struct nf_flowtable *flow_table, e = container_of(flow, struct flow_offload_entry, flow); clear_bit(IPS_OFFLOAD_BIT, &e->ct->status); + if (nf_flow_has_expired(flow)) + flow_offload_fixup_ct(e->ct); + flow_offload_free(flow); } @@ -233,7 +241,7 @@ void flow_offload_teardown(struct flow_offload *flow) flow->flags |= FLOW_OFFLOAD_TEARDOWN; e = container_of(flow, struct flow_offload_entry, flow); - flow_offload_fixup_ct_state(e->ct); + flow_offload_fixup_ct(e->ct); } EXPORT_SYMBOL_GPL(flow_offload_teardown); @@ -298,11 +306,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table, return err; } -static inline bool nf_flow_has_expired(const struct flow_offload *flow) -{ - return (__s32)(flow->timeout - (u32)jiffies) <= 0; -} - static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) { struct nf_flowtable *flow_table = data; -- cgit v1.2.1 From 1e5b2471bcc4838df298080ae1ec042c2cbc9ce9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Aug 2019 11:01:35 +0200 Subject: netfilter: nf_flow_table: teardown flow timeout race Flows that are in teardown state (due to RST / FIN TCP packet) still have their offload flag set on. Hence, the conntrack garbage collector may race to undo the timeout adjustment that the fixup routine performs, leaving the conntrack entry in place with the internal offload timeout (one day). Update teardown flow state to ESTABLISHED and set tracking to liberal, then once the offload bit is cleared, adjust timeout if it is more than the default fixup timeout (conntrack might already have set a lower timeout from the packet path). Fixes: da5984e51063 ("netfilter: nf_flow_table: add support for sending flows back to the slow path") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 68a24471ffee..80a8f9ae4c93 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -111,15 +111,16 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) #define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ) #define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ) -static void flow_offload_fixup_ct(struct nf_conn *ct) +static inline __s32 nf_flow_timeout_delta(unsigned int timeout) +{ + return (__s32)(timeout - (u32)jiffies); +} + +static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) { const struct nf_conntrack_l4proto *l4proto; + int l4num = nf_ct_protonum(ct); unsigned int timeout; - int l4num; - - l4num = nf_ct_protonum(ct); - if (l4num == IPPROTO_TCP) - flow_offload_fixup_tcp(&ct->proto.tcp); l4proto = nf_ct_l4proto_find(l4num); if (!l4proto) @@ -132,7 +133,20 @@ static void flow_offload_fixup_ct(struct nf_conn *ct) else return; - ct->timeout = nfct_time_stamp + timeout; + if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout) + ct->timeout = nfct_time_stamp + timeout; +} + +static void flow_offload_fixup_ct_state(struct nf_conn *ct) +{ + if (nf_ct_protonum(ct) == IPPROTO_TCP) + flow_offload_fixup_tcp(&ct->proto.tcp); +} + +static void flow_offload_fixup_ct(struct nf_conn *ct) +{ + flow_offload_fixup_ct_state(ct); + flow_offload_fixup_ct_timeout(ct); } void flow_offload_free(struct flow_offload *flow) @@ -210,7 +224,7 @@ EXPORT_SYMBOL_GPL(flow_offload_add); static inline bool nf_flow_has_expired(const struct flow_offload *flow) { - return (__s32)(flow->timeout - (u32)jiffies) <= 0; + return nf_flow_timeout_delta(flow->timeout) <= 0; } static void flow_offload_del(struct nf_flowtable *flow_table, @@ -230,6 +244,8 @@ static void flow_offload_del(struct nf_flowtable *flow_table, if (nf_flow_has_expired(flow)) flow_offload_fixup_ct(e->ct); + else if (flow->flags & FLOW_OFFLOAD_TEARDOWN) + flow_offload_fixup_ct_timeout(e->ct); flow_offload_free(flow); } @@ -241,7 +257,7 @@ void flow_offload_teardown(struct flow_offload *flow) flow->flags |= FLOW_OFFLOAD_TEARDOWN; e = container_of(flow, struct flow_offload_entry, flow); - flow_offload_fixup_ct(e->ct); + flow_offload_fixup_ct_state(e->ct); } EXPORT_SYMBOL_GPL(flow_offload_teardown); -- cgit v1.2.1 From 730c5fd42c1e3652a065448fd235cb9fafb2bd10 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 9 Aug 2019 15:20:41 +0100 Subject: rxrpc: Fix local endpoint refcounting The object lifetime management on the rxrpc_local struct is broken in that the rxrpc_local_processor() function is expected to clean up and remove an object - but it may get requeued by packets coming in on the backing UDP socket once it starts running. This may result in the assertion in rxrpc_local_rcu() firing because the memory has been scheduled for RCU destruction whilst still queued: rxrpc: Assertion failed ------------[ cut here ]------------ kernel BUG at net/rxrpc/local_object.c:468! Note that if the processor comes around before the RCU free function, it will just do nothing because ->dead is true. Fix this by adding a separate refcount to count active users of the endpoint that causes the endpoint to be destroyed when it reaches 0. The original refcount can then be used to refcount objects through the work processor and cause the memory to be rcu freed when that reaches 0. Fixes: 4f95dd78a77e ("rxrpc: Rework local endpoint management") Reported-by: syzbot+1e0edc4b8b7494c28450@syzkaller.appspotmail.com Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 4 +-- net/rxrpc/ar-internal.h | 5 ++- net/rxrpc/input.c | 16 ++++++--- net/rxrpc/local_object.c | 86 ++++++++++++++++++++++++++++++------------------ 4 files changed, 72 insertions(+), 39 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index d09eaf153544..8c9bd3ae9edf 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -193,7 +193,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) service_in_use: write_unlock(&local->services_lock); - rxrpc_put_local(local); + rxrpc_unuse_local(local); ret = -EADDRINUSE; error_unlock: release_sock(&rx->sk); @@ -901,7 +901,7 @@ static int rxrpc_release_sock(struct sock *sk) rxrpc_queue_work(&rxnet->service_conn_reaper); rxrpc_queue_work(&rxnet->client_conn_reaper); - rxrpc_put_local(rx->local); + rxrpc_unuse_local(rx->local); rx->local = NULL; key_put(rx->key); rx->key = NULL; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 822f45386e31..9796c45d2f6a 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -254,7 +254,8 @@ struct rxrpc_security { */ struct rxrpc_local { struct rcu_head rcu; - atomic_t usage; + atomic_t active_users; /* Number of users of the local endpoint */ + atomic_t usage; /* Number of references to the structure */ struct rxrpc_net *rxnet; /* The network ns in which this resides */ struct list_head link; struct socket *socket; /* my UDP socket */ @@ -1002,6 +1003,8 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *); struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *); void rxrpc_put_local(struct rxrpc_local *); +struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *); +void rxrpc_unuse_local(struct rxrpc_local *); void rxrpc_queue_local(struct rxrpc_local *); void rxrpc_destroy_all_locals(struct rxrpc_net *); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 5bd6f1546e5c..ee95d1cd1cdf 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1108,8 +1108,12 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local, { _enter("%p,%p", local, skb); - skb_queue_tail(&local->event_queue, skb); - rxrpc_queue_local(local); + if (rxrpc_get_local_maybe(local)) { + skb_queue_tail(&local->event_queue, skb); + rxrpc_queue_local(local); + } else { + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + } } /* @@ -1119,8 +1123,12 @@ static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) { CHECK_SLAB_OKAY(&local->usage); - skb_queue_tail(&local->reject_queue, skb); - rxrpc_queue_local(local); + if (rxrpc_get_local_maybe(local)) { + skb_queue_tail(&local->reject_queue, skb); + rxrpc_queue_local(local); + } else { + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + } } /* diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index b1c71bad510b..9798159ee65f 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -79,6 +79,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL); if (local) { atomic_set(&local->usage, 1); + atomic_set(&local->active_users, 1); local->rxnet = rxnet; INIT_LIST_HEAD(&local->link); INIT_WORK(&local->processor, rxrpc_local_processor); @@ -266,11 +267,8 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, * bind the transport socket may still fail if we're attempting * to use a local address that the dying object is still using. */ - if (!rxrpc_get_local_maybe(local)) { - cursor = cursor->next; - list_del_init(&local->link); + if (!rxrpc_use_local(local)) break; - } age = "old"; goto found; @@ -284,7 +282,10 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, if (ret < 0) goto sock_error; - list_add_tail(&local->link, cursor); + if (cursor != &rxnet->local_endpoints) + list_replace(cursor, &local->link); + else + list_add_tail(&local->link, cursor); age = "new"; found: @@ -342,7 +343,8 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) } /* - * Queue a local endpoint. + * Queue a local endpoint unless it has become unreferenced and pass the + * caller's reference to the work item. */ void rxrpc_queue_local(struct rxrpc_local *local) { @@ -351,15 +353,8 @@ void rxrpc_queue_local(struct rxrpc_local *local) if (rxrpc_queue_work(&local->processor)) trace_rxrpc_local(local, rxrpc_local_queued, atomic_read(&local->usage), here); -} - -/* - * A local endpoint reached its end of life. - */ -static void __rxrpc_put_local(struct rxrpc_local *local) -{ - _enter("%d", local->debug_id); - rxrpc_queue_work(&local->processor); + else + rxrpc_put_local(local); } /* @@ -375,10 +370,45 @@ void rxrpc_put_local(struct rxrpc_local *local) trace_rxrpc_local(local, rxrpc_local_put, n, here); if (n == 0) - __rxrpc_put_local(local); + call_rcu(&local->rcu, rxrpc_local_rcu); } } +/* + * Start using a local endpoint. + */ +struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) +{ + unsigned int au; + + local = rxrpc_get_local_maybe(local); + if (!local) + return NULL; + + au = atomic_fetch_add_unless(&local->active_users, 1, 0); + if (au == 0) { + rxrpc_put_local(local); + return NULL; + } + + return local; +} + +/* + * Cease using a local endpoint. Once the number of active users reaches 0, we + * start the closure of the transport in the work processor. + */ +void rxrpc_unuse_local(struct rxrpc_local *local) +{ + unsigned int au; + + au = atomic_dec_return(&local->active_users); + if (au == 0) + rxrpc_queue_local(local); + else + rxrpc_put_local(local); +} + /* * Destroy a local endpoint's socket and then hand the record to RCU to dispose * of. @@ -393,16 +423,6 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) _enter("%d", local->debug_id); - /* We can get a race between an incoming call packet queueing the - * processor again and the work processor starting the destruction - * process which will shut down the UDP socket. - */ - if (local->dead) { - _leave(" [already dead]"); - return; - } - local->dead = true; - mutex_lock(&rxnet->local_mutex); list_del_init(&local->link); mutex_unlock(&rxnet->local_mutex); @@ -422,13 +442,11 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) */ rxrpc_purge_queue(&local->reject_queue); rxrpc_purge_queue(&local->event_queue); - - _debug("rcu local %d", local->debug_id); - call_rcu(&local->rcu, rxrpc_local_rcu); } /* - * Process events on an endpoint + * Process events on an endpoint. The work item carries a ref which + * we must release. */ static void rxrpc_local_processor(struct work_struct *work) { @@ -441,8 +459,10 @@ static void rxrpc_local_processor(struct work_struct *work) do { again = false; - if (atomic_read(&local->usage) == 0) - return rxrpc_local_destroyer(local); + if (atomic_read(&local->active_users) == 0) { + rxrpc_local_destroyer(local); + break; + } if (!skb_queue_empty(&local->reject_queue)) { rxrpc_reject_packets(local); @@ -454,6 +474,8 @@ static void rxrpc_local_processor(struct work_struct *work) again = true; } } while (again); + + rxrpc_put_local(local); } /* -- cgit v1.2.1 From e8c3af6bb33a9e4b56920ee00aef92eb5e4cf485 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 9 Aug 2019 15:20:41 +0100 Subject: rxrpc: Don't bother generating maxSkew in the ACK packet Don't bother generating maxSkew in the ACK packet as it has been obsolete since AFS 3.1. Signed-off-by: David Howells Reviewed-by: Jeffrey Altman --- net/rxrpc/af_rxrpc.c | 2 +- net/rxrpc/ar-internal.h | 3 +-- net/rxrpc/call_event.c | 15 ++++++--------- net/rxrpc/input.c | 43 ++++++++++++++++--------------------------- net/rxrpc/output.c | 3 +-- net/rxrpc/recvmsg.c | 6 +++--- 6 files changed, 28 insertions(+), 44 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 8c9bd3ae9edf..0dbbfd1b6487 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -402,7 +402,7 @@ EXPORT_SYMBOL(rxrpc_kernel_check_life); */ void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call) { - rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false, rxrpc_propose_ack_ping_for_check_life); rxrpc_send_ack_packet(call, true, NULL); } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9796c45d2f6a..145335611af6 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -650,7 +650,6 @@ struct rxrpc_call { /* receive-phase ACK management */ u8 ackr_reason; /* reason to ACK */ - u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ rxrpc_serial_t ackr_first_seq; /* first sequence number received */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ @@ -744,7 +743,7 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, +void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool, enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index bc2adeb3acb9..c767679bfa5d 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -43,8 +43,7 @@ static void rxrpc_propose_ping(struct rxrpc_call *call, * propose an ACK be sent */ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u16 skew, u32 serial, bool immediate, - bool background, + u32 serial, bool immediate, bool background, enum rxrpc_propose_ack_trace why) { enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; @@ -69,14 +68,12 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) { outcome = rxrpc_propose_ack_update; call->ackr_serial = serial; - call->ackr_skew = skew; } if (!immediate) goto trace; } else if (prior > rxrpc_ack_priority[call->ackr_reason]) { call->ackr_reason = ack_reason; call->ackr_serial = serial; - call->ackr_skew = skew; } else { outcome = rxrpc_propose_ack_subsume; } @@ -137,11 +134,11 @@ trace: * propose an ACK be sent, locking the call structure */ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u16 skew, u32 serial, bool immediate, bool background, + u32 serial, bool immediate, bool background, enum rxrpc_propose_ack_trace why) { spin_lock_bh(&call->lock); - __rxrpc_propose_ACK(call, ack_reason, skew, serial, + __rxrpc_propose_ACK(call, ack_reason, serial, immediate, background, why); spin_unlock_bh(&call->lock); } @@ -239,7 +236,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) ack_ts = ktime_sub(now, call->acks_latest_ts); if (ktime_to_ns(ack_ts) < call->peer->rtt) goto out; - rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); rxrpc_send_ack_packet(call, true, NULL); goto out; @@ -372,7 +369,7 @@ recheck_state: if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now); cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET); - rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, true, + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, true, rxrpc_propose_ack_ping_for_keepalive); set_bit(RXRPC_CALL_EV_PING, &call->events); } @@ -407,7 +404,7 @@ recheck_state: send_ack = NULL; if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) { call->acks_lost_top = call->tx_top; - rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); send_ack = &call->acks_lost_ping; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index ee95d1cd1cdf..dd47d465d1d3 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -196,15 +196,14 @@ send_extra_data: * Ping the other end to fill our RTT cache and to retrieve the rwind * and MTU parameters. */ -static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, - int skew) +static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); ktime_t now = skb->tstamp; if (call->peer->rtt_usage < 3 || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) - rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial, true, true, rxrpc_propose_ack_ping_for_params); } @@ -419,8 +418,7 @@ static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, /* * Process a DATA packet, adding the packet to the Rx ring. */ -static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, - u16 skew) +static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); enum rxrpc_call_state state; @@ -600,11 +598,11 @@ skip: ack: if (ack) - rxrpc_propose_ACK(call, ack, skew, ack_serial, + rxrpc_propose_ACK(call, ack, ack_serial, immediate_ack, true, rxrpc_propose_ack_input_data); else - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, skew, serial, + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false, true, rxrpc_propose_ack_input_data); @@ -822,8 +820,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, * soft-ACK means that the packet may be discarded and retransmission * requested. A phase is complete when all packets are hard-ACK'd. */ -static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, - u16 skew) +static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_ack_summary summary = { 0 }; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -867,11 +864,11 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, if (buf.ack.reason == RXRPC_ACK_PING) { _proto("Rx ACK %%%u PING Request", sp->hdr.serial); rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, - skew, sp->hdr.serial, true, true, + sp->hdr.serial, true, true, rxrpc_propose_ack_respond_to_ping); } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, - skew, sp->hdr.serial, true, true, + sp->hdr.serial, true, true, rxrpc_propose_ack_respond_to_ack); } @@ -948,7 +945,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, RXRPC_TX_ANNO_LAST && summary.nr_acks == call->tx_top - hard_ack && rxrpc_is_client_call(call)) - rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial, false, true, rxrpc_propose_ack_ping_for_lost_reply); @@ -1004,7 +1001,7 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) * Process an incoming call packet. */ static void rxrpc_input_call_packet(struct rxrpc_call *call, - struct sk_buff *skb, u16 skew) + struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned long timo; @@ -1023,11 +1020,11 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: - rxrpc_input_data(call, skb, skew); + rxrpc_input_data(call, skb); break; case RXRPC_PACKET_TYPE_ACK: - rxrpc_input_ack(call, skb, skew); + rxrpc_input_ack(call, skb); break; case RXRPC_PACKET_TYPE_BUSY: @@ -1181,7 +1178,6 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) struct rxrpc_peer *peer = NULL; struct rxrpc_sock *rx = NULL; unsigned int channel; - int skew = 0; _enter("%p", udp_sk); @@ -1309,15 +1305,8 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) goto out; } - /* Note the serial number skew here */ - skew = (int)sp->hdr.serial - (int)conn->hi_serial; - if (skew >= 0) { - if (skew > 0) - conn->hi_serial = sp->hdr.serial; - } else { - skew = -skew; - skew = min(skew, 65535); - } + if ((int)sp->hdr.serial - (int)conn->hi_serial > 0) + conn->hi_serial = sp->hdr.serial; /* Call-bound packets are routed by connection channel. */ channel = sp->hdr.cid & RXRPC_CHANNELMASK; @@ -1380,11 +1369,11 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) call = rxrpc_new_incoming_call(local, rx, skb); if (!call) goto reject_packet; - rxrpc_send_ping(call, skb, skew); + rxrpc_send_ping(call, skb); mutex_unlock(&call->user_mutex); } - rxrpc_input_call_packet(call, skb, skew); + rxrpc_input_call_packet(call, skb); goto discard; discard: diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 948e3fe249ec..369e516c4bdf 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -87,7 +87,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, *_top = top; pkt->ack.bufferSpace = htons(8); - pkt->ack.maxSkew = htons(call->ackr_skew); + pkt->ack.maxSkew = htons(0); pkt->ack.firstPacket = htonl(hard_ack + 1); pkt->ack.previousPacket = htonl(call->ackr_prev_seq); pkt->ack.serial = htonl(serial); @@ -228,7 +228,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, if (ping) clear_bit(RXRPC_CALL_PINGING, &call->flags); rxrpc_propose_ACK(call, pkt->ack.reason, - ntohs(pkt->ack.maxSkew), ntohl(pkt->ack.serial), false, true, rxrpc_propose_ack_retry_tx); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 5abf46cf9e6c..9a7e1bc9791d 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -141,7 +141,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { - rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, false, true, + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial, false, true, rxrpc_propose_ack_terminal_ack); //rxrpc_send_ack_packet(call, false, NULL); } @@ -159,7 +159,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) call->state = RXRPC_CALL_SERVER_ACK_REQUEST; call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; write_unlock_bh(&call->state_lock); - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true, + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false, true, rxrpc_propose_ack_processing_op); break; default: @@ -212,7 +212,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) if (after_eq(hard_ack, call->ackr_consumed + 2) || after_eq(top, call->ackr_seen + 2) || (hard_ack == top && after(hard_ack, call->ackr_consumed))) - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, true, true, rxrpc_propose_ack_rotate_rx); if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY) -- cgit v1.2.1 From b3e78adcbf991a4e8b2ebb23c9889e968ec76c5f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Aug 2019 17:19:22 -0700 Subject: tools: bpftool: fix error message (prog -> object) Change an error message to work for any object being pinned not just programs. Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool") Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 5215e0870bcb..c52a6ffb8949 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -237,7 +237,7 @@ int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) fd = get_fd_by_id(id); if (fd < 0) { - p_err("can't get prog by id (%u): %s", id, strerror(errno)); + p_err("can't open object by id (%u): %s", id, strerror(errno)); return -1; } -- cgit v1.2.1 From 3c7be384fe6da0d7b1d6fc0ad6b4a33edb73aad5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Aug 2019 17:19:23 -0700 Subject: tools: bpftool: add error message on pin failure No error message is currently printed if the pin syscall itself fails. It got lost in the loadall refactoring. Fixes: 77380998d91d ("bpftool: add loadall command") Reported-by: Andy Lutomirski Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/common.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index c52a6ffb8949..6a71324be628 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -204,7 +204,11 @@ int do_pin_fd(int fd, const char *name) if (err) return err; - return bpf_obj_pin(fd, name); + err = bpf_obj_pin(fd, name); + if (err) + p_err("can't pin the object (%s): %s", name, strerror(errno)); + + return err; } int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) -- cgit v1.2.1 From cd48bdda4fb82c2fe569d97af4217c530168c99c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 8 Aug 2019 13:57:25 +0200 Subject: sock: make cookie generation global instead of per netns Generating and retrieving socket cookies are a useful feature that is exposed to BPF for various program types through bpf_get_socket_cookie() helper. The fact that the cookie counter is per netns is quite a limitation for BPF in practice in particular for programs in host namespace that use socket cookies as part of a map lookup key since they will be causing socket cookie collisions e.g. when attached to BPF cgroup hooks or cls_bpf on tc egress in host namespace handling container traffic from veth or ipvlan devices with peer in different netns. Change the counter to be global instead. Socket cookie consumers must assume the value as opqaue in any case. Not every socket must have a cookie generated and knowledge of the counter value itself does not provide much value either way hence conversion to global is fine. Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Cc: Alexei Starovoitov Cc: Willem de Bruijn Cc: Martynas Pumputis Signed-off-by: David S. Miller --- include/net/net_namespace.h | 1 - include/uapi/linux/bpf.h | 4 ++-- net/core/sock_diag.c | 3 ++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 4a9da951a794..cb668bc2692d 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -61,7 +61,6 @@ struct net { spinlock_t rules_mod_lock; u32 hash_mix; - atomic64_t cookie_gen; struct list_head list; /* list of network namespaces */ struct list_head exit_list; /* To linked to call pernet exit diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index fa1c753dcdbc..a5aa7d3ac6a1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1466,8 +1466,8 @@ union bpf_attr { * If no cookie has been set yet, generate a new cookie. Once * generated, the socket cookie remains stable for the life of the * socket. This helper can be useful for monitoring per socket - * networking traffic statistics as it provides a unique socket - * identifier per namespace. + * networking traffic statistics as it provides a global socket + * identifier that can be assumed unique. * Return * A 8-byte long non-decreasing number on success, or 0 if the * socket field is missing inside *skb*. diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 3312a5849a97..c13ffbd33d8d 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -19,6 +19,7 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); static struct workqueue_struct *broadcast_wq; +static atomic64_t cookie_gen; u64 sock_gen_cookie(struct sock *sk) { @@ -27,7 +28,7 @@ u64 sock_gen_cookie(struct sock *sk) if (res) return res; - res = atomic64_inc_return(&sock_net(sk)->cookie_gen); + res = atomic64_inc_return(&cookie_gen); atomic64_cmpxchg(&sk->sk_cookie, 0, res); } } -- cgit v1.2.1 From 609a2ca57afc467fbc46b7f3453de4e1811456c5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 8 Aug 2019 13:57:26 +0200 Subject: bpf: sync bpf.h to tools infrastructure Pull in updates in BPF helper function description. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4e455018da65..a5aa7d3ac6a1 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1466,8 +1466,8 @@ union bpf_attr { * If no cookie has been set yet, generate a new cookie. Once * generated, the socket cookie remains stable for the life of the * socket. This helper can be useful for monitoring per socket - * networking traffic statistics as it provides a unique socket - * identifier per namespace. + * networking traffic statistics as it provides a global socket + * identifier that can be assumed unique. * Return * A 8-byte long non-decreasing number on success, or 0 if the * socket field is missing inside *skb*. @@ -1571,8 +1571,11 @@ union bpf_attr { * but this is only implemented for native XDP (with driver * support) as of this writing). * - * All values for *flags* are reserved for future usage, and must - * be left at zero. + * The lower two bits of *flags* are used as the return code if + * the map lookup fails. This is so that the return value can be + * one of the XDP program return codes up to XDP_TX, as chosen by + * the caller. Any higher bits in the *flags* argument must be + * unset. * * When used to redirect packets to net devices, this helper * provides a high performance increase over **bpf_redirect**\ (). -- cgit v1.2.1 From 8b6381600d59871fbe44d36522272f961ab42410 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 8 Aug 2019 09:37:56 -0700 Subject: ixgbe: fix possible deadlock in ixgbe_service_task() ixgbe_service_task() calls unregister_netdev() under rtnl_lock(). But unregister_netdev() internally calls rtnl_lock(). So deadlock would occur. Fixes: 59dd45d550c5 ("ixgbe: firmware recovery mode") Signed-off-by: Taehee Yoo Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index cbaf712d6529..7882148abb43 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7897,11 +7897,8 @@ static void ixgbe_service_task(struct work_struct *work) return; } if (ixgbe_check_fw_error(adapter)) { - if (!test_bit(__IXGBE_DOWN, &adapter->state)) { - rtnl_lock(); + if (!test_bit(__IXGBE_DOWN, &adapter->state)) unregister_netdev(adapter->netdev); - rtnl_unlock(); - } ixgbe_service_event_complete(adapter); return; } -- cgit v1.2.1 From 6d0d779dca73cd5acb649c54f81401f93098b298 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 9 Aug 2019 01:58:08 +0000 Subject: hv_netvsc: Fix a warning of suspicious RCU usage This fixes a warning of "suspicious rcu_dereference_check() usage" when nload runs. Fixes: 776e726bfb34 ("netvsc: fix RCU warning in get_stats") Signed-off-by: Dexuan Cui Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 3544e1991579..e8fce6d715ef 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1239,12 +1239,15 @@ static void netvsc_get_stats64(struct net_device *net, struct rtnl_link_stats64 *t) { struct net_device_context *ndev_ctx = netdev_priv(net); - struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev); + struct netvsc_device *nvdev; struct netvsc_vf_pcpu_stats vf_tot; int i; + rcu_read_lock(); + + nvdev = rcu_dereference(ndev_ctx->nvdev); if (!nvdev) - return; + goto out; netdev_stats_to_stats64(t, &net->stats); @@ -1283,6 +1286,8 @@ static void netvsc_get_stats64(struct net_device *net, t->rx_packets += packets; t->multicast += multicast; } +out: + rcu_read_unlock(); } static int netvsc_set_mac_addr(struct net_device *ndev, void *p) -- cgit v1.2.1 From 57c722e932cfb82e9820bbaae1b1f7222ea97b52 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 9 Aug 2019 18:36:23 -0700 Subject: net/tls: swap sk_write_space on close Now that we swap the original proto and clear the ULP pointer on close we have to make sure no callback will try to access the freed state. sk_write_space is not part of sk_prot, remember to swap it. Reported-by: syzbot+dcdc9deefaec44785f32@syzkaller.appspotmail.com Fixes: 95fa145479fb ("bpf: sockmap/tls, close can race with map free") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 9cbbae606ced..ce6ef56a65ef 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -308,6 +308,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) if (free_ctx) icsk->icsk_ulp_data = NULL; sk->sk_prot = ctx->sk_proto; + sk->sk_write_space = ctx->sk_write_space; write_unlock_bh(&sk->sk_callback_lock); release_sock(sk); if (ctx->tx_conf == TLS_SW) -- cgit v1.2.1 From 59c84b9fcf42c99a945d5fdc49220d854e539690 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 6 Aug 2019 12:15:17 -0700 Subject: netdevsim: Restore per-network namespace accounting for fib entries Prior to the commit in the fixes tag, the resource controller in netdevsim tracked fib entries and rules per network namespace. Restore that behavior. Fixes: 5fc494225c1e ("netdevsim: create devlink instance per netdevsim instance") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 63 ++++++++++------------- drivers/net/netdevsim/fib.c | 102 +++++++++++++++++++++++--------------- drivers/net/netdevsim/netdev.c | 9 +++- drivers/net/netdevsim/netdevsim.h | 10 ++-- 4 files changed, 98 insertions(+), 86 deletions(-) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index c5c417a3c0ce..bcc40a236624 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -73,46 +73,47 @@ static void nsim_dev_port_debugfs_exit(struct nsim_dev_port *nsim_dev_port) debugfs_remove_recursive(nsim_dev_port->ddir); } +static struct net *nsim_devlink_net(struct devlink *devlink) +{ + return &init_net; +} + static u64 nsim_dev_ipv4_fib_resource_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv; - return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false); } static u64 nsim_dev_ipv4_fib_rules_res_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv; - return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB_RULES, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false); } static u64 nsim_dev_ipv6_fib_resource_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv; - return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false); } static u64 nsim_dev_ipv6_fib_rules_res_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv; - return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB_RULES, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false); } static int nsim_dev_resources_register(struct devlink *devlink) { - struct nsim_dev *nsim_dev = devlink_priv(devlink); struct devlink_resource_size_params params = { .size_max = (u64)-1, .size_granularity = 1, .unit = DEVLINK_RESOURCE_UNIT_ENTRY }; + struct net *net = nsim_devlink_net(devlink); int err; u64 n; @@ -126,8 +127,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) goto out; } - n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true); err = devlink_resource_register(devlink, "fib", n, NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4, ¶ms); @@ -136,8 +136,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) return err; } - n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB_RULES, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true); err = devlink_resource_register(devlink, "fib-rules", n, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV4, ¶ms); @@ -156,8 +155,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) goto out; } - n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true); err = devlink_resource_register(devlink, "fib", n, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6, ¶ms); @@ -166,8 +164,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) return err; } - n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB_RULES, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true); err = devlink_resource_register(devlink, "fib-rules", n, NSIM_RESOURCE_IPV6_FIB_RULES, NSIM_RESOURCE_IPV6, ¶ms); @@ -179,19 +176,19 @@ static int nsim_dev_resources_register(struct devlink *devlink) devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV4_FIB, nsim_dev_ipv4_fib_resource_occ_get, - nsim_dev); + net); devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV4_FIB_RULES, nsim_dev_ipv4_fib_rules_res_occ_get, - nsim_dev); + net); devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV6_FIB, nsim_dev_ipv6_fib_resource_occ_get, - nsim_dev); + net); devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV6_FIB_RULES, nsim_dev_ipv6_fib_rules_res_occ_get, - nsim_dev); + net); out: return err; } @@ -199,11 +196,11 @@ out: static int nsim_dev_reload(struct devlink *devlink, struct netlink_ext_ack *extack) { - struct nsim_dev *nsim_dev = devlink_priv(devlink); enum nsim_resource_id res_ids[] = { NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES }; + struct net *net = nsim_devlink_net(devlink); int i; for (i = 0; i < ARRAY_SIZE(res_ids); ++i) { @@ -212,8 +209,7 @@ static int nsim_dev_reload(struct devlink *devlink, err = devlink_resource_size_get(devlink, res_ids[i], &val); if (!err) { - err = nsim_fib_set_max(nsim_dev->fib_data, - res_ids[i], val, extack); + err = nsim_fib_set_max(net, res_ids[i], val, extack); if (err) return err; } @@ -285,15 +281,9 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) mutex_init(&nsim_dev->port_list_lock); nsim_dev->fw_update_status = true; - nsim_dev->fib_data = nsim_fib_create(); - if (IS_ERR(nsim_dev->fib_data)) { - err = PTR_ERR(nsim_dev->fib_data); - goto err_devlink_free; - } - err = nsim_dev_resources_register(devlink); if (err) - goto err_fib_destroy; + goto err_devlink_free; err = devlink_register(devlink, &nsim_bus_dev->dev); if (err) @@ -315,8 +305,6 @@ err_dl_unregister: devlink_unregister(devlink); err_resources_unregister: devlink_resources_unregister(devlink, NULL); -err_fib_destroy: - nsim_fib_destroy(nsim_dev->fib_data); err_devlink_free: devlink_free(devlink); return ERR_PTR(err); @@ -330,7 +318,6 @@ static void nsim_dev_destroy(struct nsim_dev *nsim_dev) nsim_dev_debugfs_exit(nsim_dev); devlink_unregister(devlink); devlink_resources_unregister(devlink, NULL); - nsim_fib_destroy(nsim_dev->fib_data); mutex_destroy(&nsim_dev->port_list_lock); devlink_free(devlink); } diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 8c57ba747772..f61d094746c0 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "netdevsim.h" @@ -32,14 +33,15 @@ struct nsim_per_fib_data { }; struct nsim_fib_data { - struct notifier_block fib_nb; struct nsim_per_fib_data ipv4; struct nsim_per_fib_data ipv6; }; -u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, - enum nsim_resource_id res_id, bool max) +static unsigned int nsim_fib_net_id; + +u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max) { + struct nsim_fib_data *fib_data = net_generic(net, nsim_fib_net_id); struct nsim_fib_entry *entry; switch (res_id) { @@ -62,10 +64,10 @@ u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, return max ? entry->max : entry->num; } -int nsim_fib_set_max(struct nsim_fib_data *fib_data, - enum nsim_resource_id res_id, u64 val, +int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val, struct netlink_ext_ack *extack) { + struct nsim_fib_data *fib_data = net_generic(net, nsim_fib_net_id); struct nsim_fib_entry *entry; int err = 0; @@ -118,9 +120,9 @@ static int nsim_fib_rule_account(struct nsim_fib_entry *entry, bool add, return err; } -static int nsim_fib_rule_event(struct nsim_fib_data *data, - struct fib_notifier_info *info, bool add) +static int nsim_fib_rule_event(struct fib_notifier_info *info, bool add) { + struct nsim_fib_data *data = net_generic(info->net, nsim_fib_net_id); struct netlink_ext_ack *extack = info->extack; int err = 0; @@ -155,9 +157,9 @@ static int nsim_fib_account(struct nsim_fib_entry *entry, bool add, return err; } -static int nsim_fib_event(struct nsim_fib_data *data, - struct fib_notifier_info *info, bool add) +static int nsim_fib_event(struct fib_notifier_info *info, bool add) { + struct nsim_fib_data *data = net_generic(info->net, nsim_fib_net_id); struct netlink_ext_ack *extack = info->extack; int err = 0; @@ -176,22 +178,18 @@ static int nsim_fib_event(struct nsim_fib_data *data, static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, void *ptr) { - struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, - fib_nb); struct fib_notifier_info *info = ptr; int err = 0; switch (event) { case FIB_EVENT_RULE_ADD: /* fall through */ case FIB_EVENT_RULE_DEL: - err = nsim_fib_rule_event(data, info, - event == FIB_EVENT_RULE_ADD); + err = nsim_fib_rule_event(info, event == FIB_EVENT_RULE_ADD); break; case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: - err = nsim_fib_event(data, info, - event == FIB_EVENT_ENTRY_ADD); + err = nsim_fib_event(info, event == FIB_EVENT_ENTRY_ADD); break; } @@ -201,23 +199,30 @@ static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, /* inconsistent dump, trying again */ static void nsim_fib_dump_inconsistent(struct notifier_block *nb) { - struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, - fib_nb); + struct nsim_fib_data *data; + struct net *net; + + rcu_read_lock(); + for_each_net_rcu(net) { + data = net_generic(net, nsim_fib_net_id); + + data->ipv4.fib.num = 0ULL; + data->ipv4.rules.num = 0ULL; - data->ipv4.fib.num = 0ULL; - data->ipv4.rules.num = 0ULL; - data->ipv6.fib.num = 0ULL; - data->ipv6.rules.num = 0ULL; + data->ipv6.fib.num = 0ULL; + data->ipv6.rules.num = 0ULL; + } + rcu_read_unlock(); } -struct nsim_fib_data *nsim_fib_create(void) -{ - struct nsim_fib_data *data; - int err; +static struct notifier_block nsim_fib_nb = { + .notifier_call = nsim_fib_event_nb, +}; - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return ERR_PTR(-ENOMEM); +/* Initialize per network namespace state */ +static int __net_init nsim_fib_netns_init(struct net *net) +{ + struct nsim_fib_data *data = net_generic(net, nsim_fib_net_id); data->ipv4.fib.max = (u64)-1; data->ipv4.rules.max = (u64)-1; @@ -225,22 +230,37 @@ struct nsim_fib_data *nsim_fib_create(void) data->ipv6.fib.max = (u64)-1; data->ipv6.rules.max = (u64)-1; - data->fib_nb.notifier_call = nsim_fib_event_nb; - err = register_fib_notifier(&data->fib_nb, nsim_fib_dump_inconsistent); - if (err) { - pr_err("Failed to register fib notifier\n"); - goto err_out; - } + return 0; +} - return data; +static struct pernet_operations nsim_fib_net_ops = { + .init = nsim_fib_netns_init, + .id = &nsim_fib_net_id, + .size = sizeof(struct nsim_fib_data), +}; -err_out: - kfree(data); - return ERR_PTR(err); +void nsim_fib_exit(void) +{ + unregister_pernet_subsys(&nsim_fib_net_ops); + unregister_fib_notifier(&nsim_fib_nb); } -void nsim_fib_destroy(struct nsim_fib_data *data) +int nsim_fib_init(void) { - unregister_fib_notifier(&data->fib_nb); - kfree(data); + int err; + + err = register_pernet_subsys(&nsim_fib_net_ops); + if (err < 0) { + pr_err("Failed to register pernet subsystem\n"); + goto err_out; + } + + err = register_fib_notifier(&nsim_fib_nb, nsim_fib_dump_inconsistent); + if (err < 0) { + pr_err("Failed to register fib notifier\n"); + goto err_out; + } + +err_out: + return err; } diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 0740940f41b1..55f57f76d01b 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -357,12 +357,18 @@ static int __init nsim_module_init(void) if (err) goto err_dev_exit; - err = rtnl_link_register(&nsim_link_ops); + err = nsim_fib_init(); if (err) goto err_bus_exit; + err = rtnl_link_register(&nsim_link_ops); + if (err) + goto err_fib_exit; + return 0; +err_fib_exit: + nsim_fib_exit(); err_bus_exit: nsim_bus_exit(); err_dev_exit: @@ -373,6 +379,7 @@ err_dev_exit: static void __exit nsim_module_exit(void) { rtnl_link_unregister(&nsim_link_ops); + nsim_fib_exit(); nsim_bus_exit(); nsim_dev_exit(); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 79c05af2a7c0..9404637d34b7 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -169,12 +169,10 @@ int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev, int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_index); -struct nsim_fib_data *nsim_fib_create(void); -void nsim_fib_destroy(struct nsim_fib_data *fib_data); -u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, - enum nsim_resource_id res_id, bool max); -int nsim_fib_set_max(struct nsim_fib_data *fib_data, - enum nsim_resource_id res_id, u64 val, +int nsim_fib_init(void); +void nsim_fib_exit(void); +u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max); +int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val, struct netlink_ext_ack *extack); #if IS_ENABLED(CONFIG_XFRM_OFFLOAD) -- cgit v1.2.1 From 68553f1a6f746bf860bce3eb42d78c26a717d9c0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 9 Aug 2019 22:47:47 +0100 Subject: rxrpc: Fix local refcounting Fix rxrpc_unuse_local() to handle a NULL local pointer as it can be called on an unbound socket on which rx->local is not yet set. The following reproduced (includes omitted): int main(void) { socket(AF_RXRPC, SOCK_DGRAM, AF_INET); return 0; } causes the following oops to occur: BUG: kernel NULL pointer dereference, address: 0000000000000010 ... RIP: 0010:rxrpc_unuse_local+0x8/0x1b ... Call Trace: rxrpc_release+0x2b5/0x338 __sock_release+0x37/0xa1 sock_close+0x14/0x17 __fput+0x115/0x1e9 task_work_run+0x72/0x98 do_exit+0x51b/0xa7a ? __context_tracking_exit+0x4e/0x10e do_group_exit+0xab/0xab __x64_sys_exit_group+0x14/0x17 do_syscall_64+0x89/0x1d4 entry_SYSCALL_64_after_hwframe+0x49/0xbe Reported-by: syzbot+20dee719a2e090427b5f@syzkaller.appspotmail.com Fixes: 730c5fd42c1e ("rxrpc: Fix local endpoint refcounting") Signed-off-by: David Howells cc: Jeffrey Altman Signed-off-by: David S. Miller --- net/rxrpc/local_object.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 9798159ee65f..c9db3e762d8d 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -402,11 +402,13 @@ void rxrpc_unuse_local(struct rxrpc_local *local) { unsigned int au; - au = atomic_dec_return(&local->active_users); - if (au == 0) - rxrpc_queue_local(local); - else - rxrpc_put_local(local); + if (local) { + au = atomic_dec_return(&local->active_users); + if (au == 0) + rxrpc_queue_local(local); + else + rxrpc_put_local(local); + } } /* -- cgit v1.2.1 From d81f41411c2549b0ae42f23140d9589172096759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sat, 10 Aug 2019 13:11:56 +0200 Subject: net: nps_enet: Fix function names in doc comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adjust the function names in two doc comments to match the corresponding functions. Signed-off-by: Jonathan Neuschäfer Signed-off-by: David S. Miller --- drivers/net/ethernet/ezchip/nps_enet.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ezchip/nps_enet.h b/drivers/net/ethernet/ezchip/nps_enet.h index 133acca0bf31..092da2d90026 100644 --- a/drivers/net/ethernet/ezchip/nps_enet.h +++ b/drivers/net/ethernet/ezchip/nps_enet.h @@ -167,7 +167,7 @@ struct nps_enet_priv { }; /** - * nps_reg_set - Sets ENET register with provided value. + * nps_enet_reg_set - Sets ENET register with provided value. * @priv: Pointer to EZchip ENET private data structure. * @reg: Register offset from base address. * @value: Value to set in register. @@ -179,7 +179,7 @@ static inline void nps_enet_reg_set(struct nps_enet_priv *priv, } /** - * nps_reg_get - Gets value of specified ENET register. + * nps_enet_reg_get - Gets value of specified ENET register. * @priv: Pointer to EZchip ENET private data structure. * @reg: Register offset from base address. * -- cgit v1.2.1 From 8028ccda39bb440d86aee6948405c8337afbed8b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 11 Aug 2019 10:48:37 +0300 Subject: mlxsw: spectrum_ptp: Keep unmatched entries in a linked list To identify timestamps for matching with their packets, Spectrum-1 uses a five-tuple of (port, direction, domain number, message type, sequence ID). If there are several clients from the same domain behind a single port sending Delay_Req's, the only thing differentiating these packets, as far as Spectrum-1 is concerned, is the sequence ID. Should sequence IDs between individual clients be similar, conflicts may arise. That is not a problem to hardware, which will simply deliver timestamps on a first comes, first served basis. However the driver uses a simple hash table to store the unmatched pieces. When a new conflicting piece arrives, it pushes out the previously stored one, which if it is a packet, is delivered without timestamp. Later on as the corresponding timestamps arrive, the first one is mismatched to the second packet, and the second one is never matched and eventually is GCd. To correct this issue, instead of using a simple rhashtable, use rhltable to keep the unmatched entries. Previously, a found unmatched entry would always be removed from the hash table. That is not the case anymore--an incompatible entry is left in the hash table. Therefore removal from the hash table cannot be used to confirm the validity of the looked-up pointer, instead the lookup would simply need to be redone. Therefore move it inside the critical section. This simplifies a lot of the code. Fixes: 8748642751ed ("mlxsw: spectrum: PTP: Support SIOCGHWTSTAMP, SIOCSHWTSTAMP ioctls") Reported-by: Alex Veber Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c | 138 ++++++++------------- 1 file changed, 55 insertions(+), 83 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index 63b07edd9d81..38bb1cfe4e8c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -29,7 +29,7 @@ struct mlxsw_sp_ptp_state { struct mlxsw_sp *mlxsw_sp; - struct rhashtable unmatched_ht; + struct rhltable unmatched_ht; spinlock_t unmatched_lock; /* protects the HT */ struct delayed_work ht_gc_dw; u32 gc_cycle; @@ -45,7 +45,7 @@ struct mlxsw_sp1_ptp_key { struct mlxsw_sp1_ptp_unmatched { struct mlxsw_sp1_ptp_key key; - struct rhash_head ht_node; + struct rhlist_head ht_node; struct rcu_head rcu; struct sk_buff *skb; u64 timestamp; @@ -359,7 +359,7 @@ static int mlxsw_sp_ptp_parse(struct sk_buff *skb, /* Returns NULL on successful insertion, a pointer on conflict, or an ERR_PTR on * error. */ -static struct mlxsw_sp1_ptp_unmatched * +static int mlxsw_sp1_ptp_unmatched_save(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp1_ptp_key key, struct sk_buff *skb, @@ -368,41 +368,51 @@ mlxsw_sp1_ptp_unmatched_save(struct mlxsw_sp *mlxsw_sp, int cycles = MLXSW_SP1_PTP_HT_GC_TIMEOUT / MLXSW_SP1_PTP_HT_GC_INTERVAL; struct mlxsw_sp_ptp_state *ptp_state = mlxsw_sp->ptp_state; struct mlxsw_sp1_ptp_unmatched *unmatched; - struct mlxsw_sp1_ptp_unmatched *conflict; + int err; unmatched = kzalloc(sizeof(*unmatched), GFP_ATOMIC); if (!unmatched) - return ERR_PTR(-ENOMEM); + return -ENOMEM; unmatched->key = key; unmatched->skb = skb; unmatched->timestamp = timestamp; unmatched->gc_cycle = mlxsw_sp->ptp_state->gc_cycle + cycles; - conflict = rhashtable_lookup_get_insert_fast(&ptp_state->unmatched_ht, - &unmatched->ht_node, - mlxsw_sp1_ptp_unmatched_ht_params); - if (conflict) + err = rhltable_insert(&ptp_state->unmatched_ht, &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); + if (err) kfree(unmatched); - return conflict; + return err; } static struct mlxsw_sp1_ptp_unmatched * mlxsw_sp1_ptp_unmatched_lookup(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp1_ptp_key key) + struct mlxsw_sp1_ptp_key key, int *p_length) { - return rhashtable_lookup(&mlxsw_sp->ptp_state->unmatched_ht, &key, - mlxsw_sp1_ptp_unmatched_ht_params); + struct mlxsw_sp1_ptp_unmatched *unmatched, *last = NULL; + struct rhlist_head *tmp, *list; + int length = 0; + + list = rhltable_lookup(&mlxsw_sp->ptp_state->unmatched_ht, &key, + mlxsw_sp1_ptp_unmatched_ht_params); + rhl_for_each_entry_rcu(unmatched, tmp, list, ht_node) { + last = unmatched; + length++; + } + + *p_length = length; + return last; } static int mlxsw_sp1_ptp_unmatched_remove(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp1_ptp_unmatched *unmatched) { - return rhashtable_remove_fast(&mlxsw_sp->ptp_state->unmatched_ht, - &unmatched->ht_node, - mlxsw_sp1_ptp_unmatched_ht_params); + return rhltable_remove(&mlxsw_sp->ptp_state->unmatched_ht, + &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); } /* This function is called in the following scenarios: @@ -489,75 +499,38 @@ static void mlxsw_sp1_ptp_got_piece(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp1_ptp_key key, struct sk_buff *skb, u64 timestamp) { - struct mlxsw_sp1_ptp_unmatched *unmatched, *conflict; + struct mlxsw_sp1_ptp_unmatched *unmatched; + int length; int err; rcu_read_lock(); - unmatched = mlxsw_sp1_ptp_unmatched_lookup(mlxsw_sp, key); - spin_lock(&mlxsw_sp->ptp_state->unmatched_lock); - if (unmatched) { - /* There was an unmatched entry when we looked, but it may have - * been removed before we took the lock. - */ - err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, unmatched); - if (err) - unmatched = NULL; - } - - if (!unmatched) { - /* We have no unmatched entry, but one may have been added after - * we looked, but before we took the lock. - */ - unmatched = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key, - skb, timestamp); - if (IS_ERR(unmatched)) { - if (skb) - mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, - key.local_port, - key.ingress, NULL); - unmatched = NULL; - } else if (unmatched) { - /* Save just told us, under lock, that the entry is - * there, so this has to work. - */ - err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, - unmatched); - WARN_ON_ONCE(err); - } - } - - /* If unmatched is non-NULL here, it comes either from the lookup, or - * from the save attempt above. In either case the entry was removed - * from the hash table. If unmatched is NULL, a new unmatched entry was - * added to the hash table, and there was no conflict. - */ - + unmatched = mlxsw_sp1_ptp_unmatched_lookup(mlxsw_sp, key, &length); if (skb && unmatched && unmatched->timestamp) { unmatched->skb = skb; } else if (timestamp && unmatched && unmatched->skb) { unmatched->timestamp = timestamp; - } else if (unmatched) { - /* unmatched holds an older entry of the same type: either an - * skb if we are handling skb, or a timestamp if we are handling - * timestamp. We can't match that up, so save what we have. + } else { + /* Either there is no entry to match, or one that is there is + * incompatible. */ - conflict = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key, - skb, timestamp); - if (IS_ERR(conflict)) { - if (skb) - mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, - key.local_port, - key.ingress, NULL); - } else { - /* Above, we removed an object with this key from the - * hash table, under lock, so conflict can not be a - * valid pointer. - */ - WARN_ON_ONCE(conflict); - } + if (length < 100) + err = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key, + skb, timestamp); + else + err = -E2BIG; + if (err && skb) + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, + key.local_port, + key.ingress, NULL); + unmatched = NULL; + } + + if (unmatched) { + err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, unmatched); + WARN_ON_ONCE(err); } spin_unlock(&mlxsw_sp->ptp_state->unmatched_lock); @@ -669,9 +642,8 @@ mlxsw_sp1_ptp_ht_gc_collect(struct mlxsw_sp_ptp_state *ptp_state, local_bh_disable(); spin_lock(&ptp_state->unmatched_lock); - err = rhashtable_remove_fast(&ptp_state->unmatched_ht, - &unmatched->ht_node, - mlxsw_sp1_ptp_unmatched_ht_params); + err = rhltable_remove(&ptp_state->unmatched_ht, &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); spin_unlock(&ptp_state->unmatched_lock); if (err) @@ -702,7 +674,7 @@ static void mlxsw_sp1_ptp_ht_gc(struct work_struct *work) ptp_state = container_of(dwork, struct mlxsw_sp_ptp_state, ht_gc_dw); gc_cycle = ptp_state->gc_cycle++; - rhashtable_walk_enter(&ptp_state->unmatched_ht, &iter); + rhltable_walk_enter(&ptp_state->unmatched_ht, &iter); rhashtable_walk_start(&iter); while ((obj = rhashtable_walk_next(&iter))) { if (IS_ERR(obj)) @@ -855,8 +827,8 @@ struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp) spin_lock_init(&ptp_state->unmatched_lock); - err = rhashtable_init(&ptp_state->unmatched_ht, - &mlxsw_sp1_ptp_unmatched_ht_params); + err = rhltable_init(&ptp_state->unmatched_ht, + &mlxsw_sp1_ptp_unmatched_ht_params); if (err) goto err_hashtable_init; @@ -891,7 +863,7 @@ err_fifo_clr: err_mtptpt1_set: mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0); err_mtptpt_set: - rhashtable_destroy(&ptp_state->unmatched_ht); + rhltable_destroy(&ptp_state->unmatched_ht); err_hashtable_init: kfree(ptp_state); return ERR_PTR(err); @@ -906,8 +878,8 @@ void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state) mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, false); mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0); mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0); - rhashtable_free_and_destroy(&ptp_state->unmatched_ht, - &mlxsw_sp1_ptp_unmatched_free_fn, NULL); + rhltable_free_and_destroy(&ptp_state->unmatched_ht, + &mlxsw_sp1_ptp_unmatched_free_fn, NULL); kfree(ptp_state); } -- cgit v1.2.1 From 58799865be84e2a895dab72de0e1b996ed943f22 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 11 Aug 2019 22:18:25 +0800 Subject: net: dsa: Check existence of .port_mdb_add callback before calling it The dsa framework has optional .port_mdb_{prepare,add,del} callback fields for drivers to handle multicast database entries. When adding an entry, the framework goes through a prepare phase, then a commit phase. Drivers not providing these callbacks should be detected in the prepare phase. DSA core may still bypass the bridge layer and call the dsa_port_mdb_add function directly with no prepare phase or no switchdev trans object, and the framework ends up calling an undefined .port_mdb_add callback. This results in a NULL pointer dereference, as shown in the log below. The other functions seem to be properly guarded. Do the same for .port_mdb_add in dsa_switch_mdb_add_bitmap() as well. 8<--- cut here --- Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = (ptrval) [00000000] *pgd=00000000 Internal error: Oops: 80000005 [#1] SMP ARM Modules linked in: rtl8xxxu rtl8192cu rtl_usb rtl8192c_common rtlwifi mac80211 cfg80211 CPU: 1 PID: 134 Comm: kworker/1:2 Not tainted 5.3.0-rc1-00247-gd3519030752a #1 Hardware name: Allwinner sun7i (A20) Family Workqueue: events switchdev_deferred_process_work PC is at 0x0 LR is at dsa_switch_event+0x570/0x620 pc : [<00000000>] lr : [] psr: 80070013 sp : ee871db8 ip : 00000000 fp : ee98d0a4 r10: 0000000c r9 : 00000008 r8 : ee89f710 r7 : ee98d040 r6 : ee98d088 r5 : c0f04c48 r4 : ee98d04c r3 : 00000000 r2 : ee89f710 r1 : 00000008 r0 : ee98d040 Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 6deb406a DAC: 00000051 Process kworker/1:2 (pid: 134, stack limit = 0x(ptrval)) Stack: (0xee871db8 to 0xee872000) 1da0: ee871e14 103ace2d 1dc0: 00000000 ffffffff 00000000 ee871e14 00000005 00000000 c08524a0 00000000 1de0: ffffe000 c014bdfc c0f04c48 ee871e98 c0f04c48 ee9e5000 c0851120 c014bef0 1e00: 00000000 b643aea2 ee9b4068 c08509a8 ee2bf940 ee89f710 ee871ecb 00000000 1e20: 00000008 103ace2d 00000000 c087e248 ee29c868 103ace2d 00000001 ffffffff 1e40: 00000000 ee871e98 00000006 00000000 c0fb2a50 c087e2d0 ffffffff c08523c4 1e60: ffffffff c014bdfc 00000006 c0fad2d0 ee871e98 ee89f710 00000000 c014c500 1e80: 00000000 ee89f3c0 c0f04c48 00000000 ee9e5000 c087dfb4 ee9e5000 00000000 1ea0: ee89f710 ee871ecb 00000001 103ace2d 00000000 c0f04c48 00000000 c087e0a8 1ec0: 00000000 efd9a3e0 0089f3c0 103ace2d ee89f700 ee89f710 ee9e5000 00000122 1ee0: 00000100 c087e130 ee89f700 c0fad2c8 c1003ef0 c087de4c 2e928000 c0fad2ec 1f00: c0fad2ec ee839580 ef7a62c0 ef7a9400 00000000 c087def8 c0fad2ec c01447dc 1f20: ef315640 ef7a62c0 00000008 ee839580 ee839594 ef7a62c0 00000008 c0f03d00 1f40: ef7a62d8 ef7a62c0 ffffe000 c0145b84 ffffe000 c0fb2420 c0bfaa8c 00000000 1f60: ffffe000 ee84b600 ee84b5c0 00000000 ee870000 ee839580 c0145b40 ef0e5ea4 1f80: ee84b61c c014a6f8 00000001 ee84b5c0 c014a5b0 00000000 00000000 00000000 1fa0: 00000000 00000000 00000000 c01010e8 00000000 00000000 00000000 00000000 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 [] (dsa_switch_event) from [] (notifier_call_chain+0x48/0x84) [] (notifier_call_chain) from [] (raw_notifier_call_chain+0x18/0x20) [] (raw_notifier_call_chain) from [] (dsa_port_mdb_add+0x48/0x74) [] (dsa_port_mdb_add) from [] (__switchdev_handle_port_obj_add+0x54/0xd4) [] (__switchdev_handle_port_obj_add) from [] (switchdev_handle_port_obj_add+0x8/0x14) [] (switchdev_handle_port_obj_add) from [] (dsa_slave_switchdev_blocking_event+0x94/0xa4) [] (dsa_slave_switchdev_blocking_event) from [] (notifier_call_chain+0x48/0x84) [] (notifier_call_chain) from [] (blocking_notifier_call_chain+0x50/0x68) [] (blocking_notifier_call_chain) from [] (switchdev_port_obj_notify+0x44/0xa8) [] (switchdev_port_obj_notify) from [] (switchdev_port_obj_add_now+0x90/0x104) [] (switchdev_port_obj_add_now) from [] (switchdev_port_obj_add_deferred+0x14/0x5c) [] (switchdev_port_obj_add_deferred) from [] (switchdev_deferred_process+0x64/0x104) [] (switchdev_deferred_process) from [] (switchdev_deferred_process_work+0xc/0x14) [] (switchdev_deferred_process_work) from [] (process_one_work+0x218/0x50c) [] (process_one_work) from [] (worker_thread+0x44/0x5bc) [] (worker_thread) from [] (kthread+0x148/0x150) [] (kthread) from [] (ret_from_fork+0x14/0x2c) Exception stack(0xee871fb0 to 0xee871ff8) 1fa0: 00000000 00000000 00000000 00000000 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 Code: bad PC value ---[ end trace 1292c61abd17b130 ]--- [] (dsa_switch_event) from [] (notifier_call_chain+0x48/0x84) corresponds to $ arm-linux-gnueabihf-addr2line -C -i -e vmlinux c08533ec linux/net/dsa/switch.c:156 linux/net/dsa/switch.c:178 linux/net/dsa/switch.c:328 Fixes: e6db98db8a95 ("net: dsa: add switch mdb bitmap functions") Signed-off-by: Chen-Yu Tsai Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/switch.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 4ec5b7f85d51..09d9286b27cc 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -153,6 +153,9 @@ static void dsa_switch_mdb_add_bitmap(struct dsa_switch *ds, { int port; + if (!ds->ops->port_mdb_add) + return; + for_each_set_bit(port, bitmap, ds->num_ports) ds->ops->port_mdb_add(ds, port, mdb); } -- cgit v1.2.1 From 8874ecae2977e5a2d4f0ba301364435b81c05938 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Mon, 12 Aug 2019 08:18:25 +1200 Subject: tipc: initialise addr_trail_end when setting node addresses We set the field 'addr_trial_end' to 'jiffies', instead of the current value 0, at the moment the node address is initialized. This guarantees we don't inadvertently enter an address trial period when the node address is explicitly set by the user. Signed-off-by: Chris Packham Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/addr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/addr.c b/net/tipc/addr.c index b88d48d00913..0f1eaed1bd1b 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -75,6 +75,7 @@ void tipc_set_node_addr(struct net *net, u32 addr) tipc_set_node_id(net, node_id); } tn->trial_addr = addr; + tn->addr_trial_end = jiffies; pr_info("32-bit node address hash set to %x\n", addr); } -- cgit v1.2.1 From 125b7e0949d4e72b15c2b1a1590f8cece985a918 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sun, 11 Aug 2019 20:13:45 -0700 Subject: net: tc35815: Explicitly check NET_IP_ALIGN is not zero in tc35815_rx clang warns: drivers/net/ethernet/toshiba/tc35815.c:1507:30: warning: use of logical '&&' with constant operand [-Wconstant-logical-operand] if (!HAVE_DMA_RXALIGN(lp) && NET_IP_ALIGN) ^ ~~~~~~~~~~~~ drivers/net/ethernet/toshiba/tc35815.c:1507:30: note: use '&' for a bitwise operation if (!HAVE_DMA_RXALIGN(lp) && NET_IP_ALIGN) ^~ & drivers/net/ethernet/toshiba/tc35815.c:1507:30: note: remove constant to silence this warning if (!HAVE_DMA_RXALIGN(lp) && NET_IP_ALIGN) ~^~~~~~~~~~~~~~~ 1 warning generated. Explicitly check that NET_IP_ALIGN is not zero, which matches how this is checked in other parts of the tree. Because NET_IP_ALIGN is a build time constant, this check will be constant folded away during optimization. Fixes: 82a9928db560 ("tc35815: Enable StripCRC feature") Link: https://github.com/ClangBuiltLinux/linux/issues/608 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/tc35815.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 8479a440527b..12466a72cefc 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -1504,7 +1504,7 @@ tc35815_rx(struct net_device *dev, int limit) pci_unmap_single(lp->pci_dev, lp->rx_skbs[cur_bd].skb_dma, RX_BUF_SIZE, PCI_DMA_FROMDEVICE); - if (!HAVE_DMA_RXALIGN(lp) && NET_IP_ALIGN) + if (!HAVE_DMA_RXALIGN(lp) && NET_IP_ALIGN != 0) memmove(skb->data, skb->data - NET_IP_ALIGN, pkt_len); data = skb_put(skb, pkt_len); -- cgit v1.2.1 From a9577f1921255b975da2a47d8f6733b2d6c98193 Mon Sep 17 00:00:00 2001 From: "Lowry Li (Arm Technology China)" Date: Tue, 6 Aug 2019 06:31:56 +0000 Subject: drm/komeda: Adds internal bpp computing for arm afbc only format YU08 YU10 The drm_format_info doesn't have any cpp or block_size (both are zero) information for arm only afbc format YU08/YU10. we need to compute it by ourselves. Changes since v1: 1. Removed redundant warning check in komeda_get_afbc_format_bpp(); 2. Removed a redundant empty line; 3. Rebased the branch. Signed-off-by: Lowry Li (Arm Technology China) Reviewed-by: James Qian Wang (Arm Technology China) Signed-off-by: james qian wang (Arm Technology China) Link: https://patchwork.freedesktop.org/patch/msgid/1565073104-24047-1-git-send-email-lowry.li@arm.com Link: https://patchwork.freedesktop.org/patch/msgid/1565073104-24047-1-git-send-email-lowry.li@arm.com --- .../gpu/drm/arm/display/komeda/komeda_format_caps.c | 19 +++++++++++++++++++ .../gpu/drm/arm/display/komeda/komeda_format_caps.h | 3 +++ .../gpu/drm/arm/display/komeda/komeda_framebuffer.c | 5 +++-- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c b/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c index cd4d9f53ddef..c9a1edb9a000 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c @@ -35,6 +35,25 @@ komeda_get_format_caps(struct komeda_format_caps_table *table, return NULL; } +u32 komeda_get_afbc_format_bpp(const struct drm_format_info *info, u64 modifier) +{ + u32 bpp; + + switch (info->format) { + case DRM_FORMAT_YUV420_8BIT: + bpp = 12; + break; + case DRM_FORMAT_YUV420_10BIT: + bpp = 15; + break; + default: + bpp = info->cpp[0] * 8; + break; + } + + return bpp; +} + /* Two assumptions * 1. RGB always has YTR * 2. Tiled RGB always has SC diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h b/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h index 3631910d33b5..32273cf18f7c 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h +++ b/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h @@ -97,6 +97,9 @@ const struct komeda_format_caps * komeda_get_format_caps(struct komeda_format_caps_table *table, u32 fourcc, u64 modifier); +u32 komeda_get_afbc_format_bpp(const struct drm_format_info *info, + u64 modifier); + u32 *komeda_get_layer_fourcc_list(struct komeda_format_caps_table *table, u32 layer_type, u32 *n_fmts); diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c index 3b0a70ed6aa0..1b01a625f40e 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c @@ -43,7 +43,7 @@ komeda_fb_afbc_size_check(struct komeda_fb *kfb, struct drm_file *file, struct drm_framebuffer *fb = &kfb->base; const struct drm_format_info *info = fb->format; struct drm_gem_object *obj; - u32 alignment_w = 0, alignment_h = 0, alignment_header, n_blocks; + u32 alignment_w = 0, alignment_h = 0, alignment_header, n_blocks, bpp; u64 min_size; obj = drm_gem_object_lookup(file, mode_cmd->handles[0]); @@ -88,8 +88,9 @@ komeda_fb_afbc_size_check(struct komeda_fb *kfb, struct drm_file *file, kfb->offset_payload = ALIGN(n_blocks * AFBC_HEADER_SIZE, alignment_header); + bpp = komeda_get_afbc_format_bpp(info, fb->modifier); kfb->afbc_size = kfb->offset_payload + n_blocks * - ALIGN(info->cpp[0] * AFBC_SUPERBLK_PIXELS, + ALIGN(bpp * AFBC_SUPERBLK_PIXELS / 8, AFBC_SUPERBLK_ALIGNMENT); min_size = kfb->afbc_size + fb->offsets[0]; if (min_size > obj->size) { -- cgit v1.2.1 From 8f1c748b9a7751ee1297b4880788a09f7c802eb4 Mon Sep 17 00:00:00 2001 From: Mihail Atanassov Date: Mon, 5 Aug 2019 09:56:25 +0000 Subject: drm/komeda: Add support for 'memory-region' DT node property The 'memory-region' property of the komeda display driver DT binding allows the use of a 'reserved-memory' node for buffer allocations. Add the requisite of_reserved_mem_device_{init,release} calls to actually make use of the memory if present. Changes since v1: - Move handling inside komeda_parse_dt Signed-off-by: Mihail Atanassov Reviewed-by: James Qian Wang (Arm Technology China) Signed-off-by: james qian wang (Arm Technology China) Link: https://patchwork.freedesktop.org/patch/msgid/20190805095408.21285-1-mihail.atanassov@arm.com --- drivers/gpu/drm/arm/display/komeda/komeda_dev.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c index 5a118984de33..a0eabc134dd6 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #ifdef CONFIG_DEBUG_FS @@ -143,6 +144,12 @@ static int komeda_parse_dt(struct device *dev, struct komeda_dev *mdev) return mdev->irq; } + /* Get the optional framebuffer memory resource */ + ret = of_reserved_mem_device_init(dev); + if (ret && ret != -ENODEV) + return ret; + ret = 0; + for_each_available_child_of_node(np, child) { if (of_node_cmp(child->name, "pipeline") == 0) { ret = komeda_parse_pipe_dt(mdev, child); @@ -289,6 +296,8 @@ void komeda_dev_destroy(struct komeda_dev *mdev) mdev->n_pipelines = 0; + of_reserved_mem_device_release(dev); + if (funcs && funcs->cleanup) funcs->cleanup(mdev); -- cgit v1.2.1 From 63daf4e166545363f3b875f5b81aecb46e1e1d19 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 9 Aug 2019 13:00:38 +0300 Subject: drm/omap: ensure we have a valid dma_mask The omapdrm driver uses dma_set_coherent_mask(), but that's not enough anymore when LPAE is enabled. From Christoph Hellwig : > The traditional arm DMA code ignores, but the generic dma-direct/swiotlb > has stricter checks and thus fails mappings without a DMA mask. As we > use swiotlb for arm with LPAE now, omapdrm needs to catch up and > actually set a DMA mask. Change the dma_set_coherent_mask() call to dma_coerce_mask_and_coherent() so that the dev->dma_mask is also set. Fixes: ad3c7b18c5b3 ("arm: use swiotlb for bounce buffering on LPAE configs") Reported-by: "H. Nikolaus Schaller" Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/c219e7e6-0f66-d6fd-e0cf-59c803386825@ti.com Reviewed-by: Christoph Hellwig Reviewed-by: Peter Ujfalusi --- drivers/gpu/drm/omapdrm/omap_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c index 288c59dae56a..1bad0a2cc5c6 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.c +++ b/drivers/gpu/drm/omapdrm/omap_drv.c @@ -669,7 +669,7 @@ static int pdev_probe(struct platform_device *pdev) if (omapdss_is_initialized() == false) return -EPROBE_DEFER; - ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); + ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (ret) { dev_err(&pdev->dev, "Failed to set the DMA mask\n"); return ret; -- cgit v1.2.1 From bb2d267c448f4bc3a3389d97c56391cb779178ae Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 12 Aug 2019 17:03:32 +0200 Subject: s390/bpf: fix lcgr instruction encoding "masking, test in bounds 3" fails on s390, because BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0) ignores the top 32 bits of BPF_REG_2. The reason is that JIT emits lcgfr instead of lcgr. The associated comment indicates that the code was intended to emit lcgr in the first place, it's just that the wrong opcode was used. Fix by using the correct opcode. Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Signed-off-by: Ilya Leoshkevich Acked-by: Vasily Gorbik Signed-off-by: Daniel Borkmann --- arch/s390/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index e636728ab452..6299156f9738 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -863,7 +863,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i break; case BPF_ALU64 | BPF_NEG: /* dst = -dst */ /* lcgr %dst,%dst */ - EMIT4(0xb9130000, dst_reg, dst_reg); + EMIT4(0xb9030000, dst_reg, dst_reg); break; /* * BPF_FROM_BE/LE -- cgit v1.2.1 From 5ee6310fb163ba7c66718905d4a19f1e71e641e0 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 10 Jul 2019 06:12:22 +0000 Subject: Bluetooth: btusb: Fix error return code in btusb_mtk_setup_firmware() Fix to return error code -EINVAL from the error handling case instead of 0, as done elsewhere in this function. Fixes: a1c49c434e15 ("Bluetooth: btusb: Add protocol support for MediaTek MT7668U USB devices") Signed-off-by: Wei Yongjun Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 3876fee6ad13..5cf0734eb31b 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2762,8 +2762,10 @@ static int btusb_mtk_setup_firmware(struct hci_dev *hdev, const char *fwname) fw_size = fw->size; /* The size of patch header is 30 bytes, should be skip */ - if (fw_size < 30) + if (fw_size < 30) { + err = -EINVAL; goto err_release_fw; + } fw_size -= 30; fw_ptr += 30; -- cgit v1.2.1 From 8059ba0bd0e4694e51c2ee6438a77b325f06c0d5 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 9 Jul 2019 15:44:50 -0700 Subject: Bluetooth: btqca: Add a short delay before downloading the NVM On WCN3990 downloading the NVM sometimes fails with a "TLV response size mismatch" error: [ 174.949955] Bluetooth: btqca.c:qca_download_firmware() hci0: QCA Downloading qca/crnv21.bin [ 174.958718] Bluetooth: btqca.c:qca_tlv_send_segment() hci0: QCA TLV response size mismatch It seems the controller needs a short time after downloading the firmware before it is ready for the NVM. A delay as short as 1 ms seems sufficient, make it 10 ms just in case. No event is received during the delay, hence we don't just silently drop an extra event. Signed-off-by: Matthias Kaehlcke Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btqca.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 8b33128dccee..c59ca5782b63 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -388,6 +388,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, return err; } + /* Give the controller some time to get ready to receive the NVM */ + msleep(10); + /* Download NVM configuration */ config.type = TLV_TYPE_NVM; if (firmware_name) -- cgit v1.2.1 From 4974c839d45e2ac89ce0e82b49d548cc12e02a9c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 9 Jul 2019 01:35:30 +0000 Subject: Bluetooth: hci_qca: Use kfree_skb() instead of kfree() Use kfree_skb() instead of kfree() to free sk_buff. Fixes: 2faa3f15fa2f ("Bluetooth: hci_qca: wcn3990: Drop baudrate change vendor event") Signed-off-by: Wei Yongjun Reviewed-by: Matthias Kaehlcke Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_qca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 82a0a3691a63..3c9fd165fda6 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -912,7 +912,7 @@ static int qca_recv_event(struct hci_dev *hdev, struct sk_buff *skb) if (hdr->evt == HCI_EV_VENDOR) complete(&qca->drop_ev_comp); - kfree(skb); + kfree_skb(skb); return 0; } -- cgit v1.2.1 From 2fde6afb8c7fce8e679c1072891cd31d54af5b83 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 8 Jul 2019 14:57:42 -0700 Subject: Bluetooth: btqca: Use correct byte format for opcode of injected command The opcode of the command injected by commit 32646db8cc28 ("Bluetooth: btqca: inject command complete event during fw download") uses the CPU byte format, however it should always be little endian. In practice it shouldn't really matter, since all we need is an opcode != 0, but still let's do things correctly and keep sparse happy. Fixes: 32646db8cc28 ("Bluetooth: btqca: inject command complete event during fw download") Reported-by: kbuild test robot Signed-off-by: Matthias Kaehlcke Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btqca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index c59ca5782b63..81a5c45bdcd9 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -268,7 +268,7 @@ static int qca_inject_cmd_complete_event(struct hci_dev *hdev) evt = skb_put(skb, sizeof(*evt)); evt->ncmd = 1; - evt->opcode = QCA_HCI_CC_OPCODE; + evt->opcode = cpu_to_le16(QCA_HCI_CC_OPCODE); skb_put_u8(skb, QCA_HCI_CC_SUCCESS); -- cgit v1.2.1 From a2780889e247561744dd8efbd3478a1999b72ae3 Mon Sep 17 00:00:00 2001 From: Harish Bandi Date: Fri, 12 Jul 2019 10:39:40 +0530 Subject: Bluetooth: hci_qca: Send VS pre shutdown command. WCN399x chips are coex chips, it needs a VS pre shutdown command while turning off the BT. So that chip can inform BT is OFF to other active clients. Signed-off-by: Harish Bandi Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btqca.c | 21 +++++++++++++++++++++ drivers/bluetooth/btqca.h | 7 +++++++ drivers/bluetooth/hci_qca.c | 3 +++ 3 files changed, 31 insertions(+) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 81a5c45bdcd9..2221935fac7e 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -99,6 +99,27 @@ static int qca_send_reset(struct hci_dev *hdev) return 0; } +int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) +{ + struct sk_buff *skb; + int err; + + bt_dev_dbg(hdev, "QCA pre shutdown cmd"); + + skb = __hci_cmd_sync(hdev, QCA_PRE_SHUTDOWN_CMD, 0, + NULL, HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + bt_dev_err(hdev, "QCA preshutdown_cmd failed (%d)", err); + return err; + } + + kfree_skb(skb); + + return 0; +} +EXPORT_SYMBOL_GPL(qca_send_pre_shutdown_cmd); + static void qca_tlv_check_data(struct rome_config *config, const struct firmware *fw) { diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index 6a291a7a5d96..69c5315a65fd 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -13,6 +13,7 @@ #define EDL_PATCH_TLV_REQ_CMD (0x1E) #define EDL_NVM_ACCESS_SET_REQ_CMD (0x01) #define MAX_SIZE_PER_TLV_SEGMENT (243) +#define QCA_PRE_SHUTDOWN_CMD (0xFC08) #define EDL_CMD_REQ_RES_EVT (0x00) #define EDL_PATCH_VER_RES_EVT (0x19) @@ -135,6 +136,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, const char *firmware_name); int qca_read_soc_version(struct hci_dev *hdev, u32 *soc_version); int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr); +int qca_send_pre_shutdown_cmd(struct hci_dev *hdev); static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) { return soc_type == QCA_WCN3990 || soc_type == QCA_WCN3998; @@ -167,4 +169,9 @@ static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) { return false; } + +static inline int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) +{ + return -EOPNOTSUPP; +} #endif diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 3c9fd165fda6..0cfa5b831d39 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1386,6 +1386,9 @@ static int qca_power_off(struct hci_dev *hdev) { struct hci_uart *hu = hci_get_drvdata(hdev); + /* Perform pre shutdown command */ + qca_send_pre_shutdown_cmd(hdev); + qca_power_shutdown(hu); return 0; } -- cgit v1.2.1 From 48d9cc9d85dde37c87abb7ac9bbec6598ba44b56 Mon Sep 17 00:00:00 2001 From: Fabian Henneke Date: Mon, 15 Jul 2019 19:40:56 +0200 Subject: Bluetooth: hidp: Let hidp_send_message return number of queued bytes Let hidp_send_message return the number of successfully queued bytes instead of an unconditional 0. With the return value fixed to 0, other drivers relying on hidp, such as hidraw, can not return meaningful values from their respective implementations of write(). In particular, with the current behavior, a hidraw device's write() will have different return values depending on whether the device is connected via USB or Bluetooth, which makes it harder to abstract away the transport layer. Signed-off-by: Fabian Henneke Signed-off-by: Marcel Holtmann --- net/bluetooth/hidp/core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 5abd423b55fa..8d889969ae7e 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -101,6 +101,7 @@ static int hidp_send_message(struct hidp_session *session, struct socket *sock, { struct sk_buff *skb; struct sock *sk = sock->sk; + int ret; BT_DBG("session %p data %p size %d", session, data, size); @@ -114,13 +115,17 @@ static int hidp_send_message(struct hidp_session *session, struct socket *sock, } skb_put_u8(skb, hdr); - if (data && size > 0) + if (data && size > 0) { skb_put_data(skb, data, size); + ret = size; + } else { + ret = 0; + } skb_queue_tail(transmit, skb); wake_up_interruptible(sk_sleep(sk)); - return 0; + return ret; } static int hidp_send_ctrl_message(struct hidp_session *session, -- cgit v1.2.1 From c7c5ae2902bf8fe9acc75f798c0de75ac9295ccf Mon Sep 17 00:00:00 2001 From: Claire Chang Date: Tue, 6 Aug 2019 17:56:29 +0800 Subject: Bluetooth: btqca: release_firmware after qca_inject_cmd_complete_event commit 32646db8cc28 ("Bluetooth: btqca: inject command complete event during fw download") added qca_inject_cmd_complete_event() for certain qualcomm chips. However, qca_download_firmware() will return without calling release_firmware() in this case. This leads to a memory leak like the following found by kmemleak: unreferenced object 0xfffffff3868a5880 (size 128): comm "kworker/u17:5", pid 347, jiffies 4294676481 (age 312.157s) hex dump (first 32 bytes): ac fd 00 00 00 00 00 00 00 d0 7e 17 80 ff ff ff ..........~..... 00 00 00 00 00 00 00 00 00 59 8a 86 f3 ff ff ff .........Y...... backtrace: [<00000000978ce31d>] kmem_cache_alloc_trace+0x194/0x298 [<000000006ea0398c>] _request_firmware+0x74/0x4e4 [<000000004da31ca0>] request_firmware+0x44/0x64 [<0000000094572996>] qca_download_firmware+0x74/0x6e4 [btqca] [<00000000b24d615a>] qca_uart_setup+0xc0/0x2b0 [btqca] [<00000000364a6d5a>] qca_setup+0x204/0x570 [hci_uart] [<000000006be1a544>] hci_uart_setup+0xa8/0x148 [hci_uart] [<00000000d64c0f4f>] hci_dev_do_open+0x144/0x530 [bluetooth] [<00000000f69f5110>] hci_power_on+0x84/0x288 [bluetooth] [<00000000d4151583>] process_one_work+0x210/0x420 [<000000003cf3dcfb>] worker_thread+0x2c4/0x3e4 [<000000007ccaf055>] kthread+0x124/0x134 [<00000000bef1f723>] ret_from_fork+0x10/0x18 [<00000000c36ee3dd>] 0xffffffffffffffff unreferenced object 0xfffffff37b16de00 (size 128): comm "kworker/u17:5", pid 347, jiffies 4294676873 (age 311.766s) hex dump (first 32 bytes): da 07 00 00 00 00 00 00 00 50 ff 0b 80 ff ff ff .........P...... 00 00 00 00 00 00 00 00 00 dd 16 7b f3 ff ff ff ...........{.... backtrace: [<00000000978ce31d>] kmem_cache_alloc_trace+0x194/0x298 [<000000006ea0398c>] _request_firmware+0x74/0x4e4 [<000000004da31ca0>] request_firmware+0x44/0x64 [<0000000094572996>] qca_download_firmware+0x74/0x6e4 [btqca] [<000000000cde20a9>] qca_uart_setup+0x144/0x2b0 [btqca] [<00000000364a6d5a>] qca_setup+0x204/0x570 [hci_uart] [<000000006be1a544>] hci_uart_setup+0xa8/0x148 [hci_uart] [<00000000d64c0f4f>] hci_dev_do_open+0x144/0x530 [bluetooth] [<00000000f69f5110>] hci_power_on+0x84/0x288 [bluetooth] [<00000000d4151583>] process_one_work+0x210/0x420 [<000000003cf3dcfb>] worker_thread+0x2c4/0x3e4 [<000000007ccaf055>] kthread+0x124/0x134 [<00000000bef1f723>] ret_from_fork+0x10/0x18 [<00000000c36ee3dd>] 0xffffffffffffffff Make sure release_firmware() is called aftre qca_inject_cmd_complete_event() to avoid the memory leak. Fixes: 32646db8cc28 ("Bluetooth: btqca: inject command complete event during fw download") Signed-off-by: Claire Chang Reviewed-by: Balakrishna Godavarthi Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btqca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 2221935fac7e..8f0fec5acade 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -344,7 +344,7 @@ static int qca_download_firmware(struct hci_dev *hdev, */ if (config->dnld_type == ROME_SKIP_EVT_VSE_CC || config->dnld_type == ROME_SKIP_EVT_VSE) - return qca_inject_cmd_complete_event(hdev); + ret = qca_inject_cmd_complete_event(hdev); out: release_firmware(fw); -- cgit v1.2.1 From 12072a68961af20e84ddb4aba2387ba5f70e8c14 Mon Sep 17 00:00:00 2001 From: Balakrishna Godavarthi Date: Thu, 8 Aug 2019 14:26:08 +0530 Subject: Bluetooth: btqca: Reset download type to default This patch will reset the download flag to default value before retrieving the download mode type. Fixes: 32646db8cc28 ("Bluetooth: btqca: inject command complete event during fw download") Signed-off-by: Balakrishna Godavarthi Tested-by: Claire Chang Reviewed-by: Claire Chang Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btqca.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 8f0fec5acade..0875470a7806 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -140,6 +140,7 @@ static void qca_tlv_check_data(struct rome_config *config, BT_DBG("Length\t\t : %d bytes", length); config->dnld_mode = ROME_SKIP_EVT_NONE; + config->dnld_type = ROME_SKIP_EVT_NONE; switch (config->type) { case TLV_TYPE_PATCH: -- cgit v1.2.1 From bb0ce4c1517d299d1a38075ecded62a5a5342c6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Fri, 9 Aug 2019 12:20:25 +0100 Subject: net: phy: at803x: stop switching phy delay config needlessly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver does a funny dance disabling and re-enabling RX and/or TX delays. In any of the RGMII-ID modes, it first disables the delays, just to re-enable them again right away. This looks like a needless exercise. Just enable the respective delays when in any of the relevant 'id' modes, and disable them otherwise. Also, remove comments which don't add anything that can't be seen by looking at the code. Signed-off-by: André Draszik CC: Andrew Lunn CC: Florian Fainelli CC: Heiner Kallweit CC: "David S. Miller" CC: netdev@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/phy/at803x.c | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 222ccd9ecfce..6ad8b1c63c34 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -257,36 +257,20 @@ static int at803x_config_init(struct phy_device *phydev) * after HW reset: RX delay enabled and TX delay disabled * after SW reset: RX delay enabled, while TX delay retains the * value before reset. - * - * So let's first disable the RX and TX delays in PHY and enable - * them based on the mode selected (this also takes care of RGMII - * mode where we expect delays to be disabled) */ - - ret = at803x_disable_rx_delay(phydev); - if (ret < 0) - return ret; - ret = at803x_disable_tx_delay(phydev); - if (ret < 0) - return ret; - if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || - phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) { - /* If RGMII_ID or RGMII_RXID are specified enable RX delay, - * otherwise keep it disabled - */ + phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) ret = at803x_enable_rx_delay(phydev); - if (ret < 0) - return ret; - } + else + ret = at803x_disable_rx_delay(phydev); + if (ret < 0) + return ret; if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || - phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) { - /* If RGMII_ID or RGMII_TXID are specified enable TX delay, - * otherwise keep it disabled - */ + phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) ret = at803x_enable_tx_delay(phydev); - } + else + ret = at803x_disable_tx_delay(phydev); return ret; } -- cgit v1.2.1 From 06709e81c668f5f56c65b806895b278517bd44e0 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 30 Jul 2019 21:29:52 +0800 Subject: lib: logic_pio: Fix RCU usage The traversing of io_range_list with list_for_each_entry_rcu() is not properly protected by rcu_read_lock() and rcu_read_unlock(), so add them. These functions mark the critical section scope where the list is protected for the reader, it cannot be "reclaimed". Any updater - in this case, the logical PIO registration functions - cannot update the list until the reader exits this critical section. In addition, the list traversing used in logic_pio_register_range() does not need to use the rcu variant. This is because we are already using io_range_mutex to guarantee mutual exclusion from mutating the list. Cc: stable@vger.kernel.org Fixes: 031e3601869c ("lib: Add generic PIO mapping method") Signed-off-by: John Garry Signed-off-by: Wei Xu --- lib/logic_pio.c | 49 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/lib/logic_pio.c b/lib/logic_pio.c index feea48fd1a0d..761296376fbc 100644 --- a/lib/logic_pio.c +++ b/lib/logic_pio.c @@ -46,7 +46,7 @@ int logic_pio_register_range(struct logic_pio_hwaddr *new_range) end = new_range->hw_start + new_range->size; mutex_lock(&io_range_mutex); - list_for_each_entry_rcu(range, &io_range_list, list) { + list_for_each_entry(range, &io_range_list, list) { if (range->fwnode == new_range->fwnode) { /* range already there */ goto end_register; @@ -108,26 +108,38 @@ end_register: */ struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode) { - struct logic_pio_hwaddr *range; + struct logic_pio_hwaddr *range, *found_range = NULL; + rcu_read_lock(); list_for_each_entry_rcu(range, &io_range_list, list) { - if (range->fwnode == fwnode) - return range; + if (range->fwnode == fwnode) { + found_range = range; + break; + } } - return NULL; + rcu_read_unlock(); + + return found_range; } /* Return a registered range given an input PIO token */ static struct logic_pio_hwaddr *find_io_range(unsigned long pio) { - struct logic_pio_hwaddr *range; + struct logic_pio_hwaddr *range, *found_range = NULL; + rcu_read_lock(); list_for_each_entry_rcu(range, &io_range_list, list) { - if (in_range(pio, range->io_start, range->size)) - return range; + if (in_range(pio, range->io_start, range->size)) { + found_range = range; + break; + } } - pr_err("PIO entry token %lx invalid\n", pio); - return NULL; + rcu_read_unlock(); + + if (!found_range) + pr_err("PIO entry token 0x%lx invalid\n", pio); + + return found_range; } /** @@ -180,14 +192,23 @@ unsigned long logic_pio_trans_cpuaddr(resource_size_t addr) { struct logic_pio_hwaddr *range; + rcu_read_lock(); list_for_each_entry_rcu(range, &io_range_list, list) { if (range->flags != LOGIC_PIO_CPU_MMIO) continue; - if (in_range(addr, range->hw_start, range->size)) - return addr - range->hw_start + range->io_start; + if (in_range(addr, range->hw_start, range->size)) { + unsigned long cpuaddr; + + cpuaddr = addr - range->hw_start + range->io_start; + + rcu_read_unlock(); + return cpuaddr; + } } - pr_err("addr %llx not registered in io_range_list\n", - (unsigned long long) addr); + rcu_read_unlock(); + + pr_err("addr %pa not registered in io_range_list\n", &addr); + return ~0UL; } -- cgit v1.2.1 From 0a27142bd1ee259e24a0be2b0133e5ca5df8da91 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 30 Jul 2019 21:29:53 +0800 Subject: lib: logic_pio: Avoid possible overlap for unregistering regions The code was originally written to not support unregistering logical PIO regions. To accommodate supporting unregistering logical PIO regions, subtly modify LOGIC_PIO_CPU_MMIO region registration code, such that the "end" of the registered regions is the "end" of the last region, and not the sum of the sizes of all the registered regions. Cc: stable@vger.kernel.org Signed-off-by: John Garry Signed-off-by: Wei Xu --- lib/logic_pio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/logic_pio.c b/lib/logic_pio.c index 761296376fbc..d0165c88f705 100644 --- a/lib/logic_pio.c +++ b/lib/logic_pio.c @@ -35,7 +35,7 @@ int logic_pio_register_range(struct logic_pio_hwaddr *new_range) struct logic_pio_hwaddr *range; resource_size_t start; resource_size_t end; - resource_size_t mmio_sz = 0; + resource_size_t mmio_end = 0; resource_size_t iio_sz = MMIO_UPPER_LIMIT; int ret = 0; @@ -56,7 +56,7 @@ int logic_pio_register_range(struct logic_pio_hwaddr *new_range) /* for MMIO ranges we need to check for overlap */ if (start >= range->hw_start + range->size || end < range->hw_start) { - mmio_sz += range->size; + mmio_end = range->io_start + range->size; } else { ret = -EFAULT; goto end_register; @@ -69,16 +69,16 @@ int logic_pio_register_range(struct logic_pio_hwaddr *new_range) /* range not registered yet, check for available space */ if (new_range->flags == LOGIC_PIO_CPU_MMIO) { - if (mmio_sz + new_range->size - 1 > MMIO_UPPER_LIMIT) { + if (mmio_end + new_range->size - 1 > MMIO_UPPER_LIMIT) { /* if it's too big check if 64K space can be reserved */ - if (mmio_sz + SZ_64K - 1 > MMIO_UPPER_LIMIT) { + if (mmio_end + SZ_64K - 1 > MMIO_UPPER_LIMIT) { ret = -E2BIG; goto end_register; } new_range->size = SZ_64K; pr_warn("Requested IO range too big, new size set to 64K\n"); } - new_range->io_start = mmio_sz; + new_range->io_start = mmio_end; } else if (new_range->flags == LOGIC_PIO_INDIRECT) { if (iio_sz + new_range->size - 1 > IO_SPACE_LIMIT) { ret = -E2BIG; -- cgit v1.2.1 From b884e2de2afc68ce30f7093747378ef972dde253 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 30 Jul 2019 21:29:54 +0800 Subject: lib: logic_pio: Add logic_pio_unregister_range() Add a function to unregister a logical PIO range. Logical PIO space can still be leaked when unregistering certain LOGIC_PIO_CPU_MMIO regions, but this acceptable for now since there are no callers to unregister LOGIC_PIO_CPU_MMIO regions, and the logical PIO region allocation scheme would need significant work to improve this. Cc: stable@vger.kernel.org Signed-off-by: John Garry Signed-off-by: Wei Xu --- include/linux/logic_pio.h | 1 + lib/logic_pio.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/include/linux/logic_pio.h b/include/linux/logic_pio.h index cbd9d8495690..88e1e6304a71 100644 --- a/include/linux/logic_pio.h +++ b/include/linux/logic_pio.h @@ -117,6 +117,7 @@ struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode); unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode, resource_size_t hw_addr, resource_size_t size); int logic_pio_register_range(struct logic_pio_hwaddr *newrange); +void logic_pio_unregister_range(struct logic_pio_hwaddr *range); resource_size_t logic_pio_to_hwaddr(unsigned long pio); unsigned long logic_pio_trans_cpuaddr(resource_size_t hw_addr); diff --git a/lib/logic_pio.c b/lib/logic_pio.c index d0165c88f705..905027574e5d 100644 --- a/lib/logic_pio.c +++ b/lib/logic_pio.c @@ -98,6 +98,20 @@ end_register: return ret; } +/** + * logic_pio_unregister_range - unregister a logical PIO range for a host + * @range: pointer to the IO range which has been already registered. + * + * Unregister a previously-registered IO range node. + */ +void logic_pio_unregister_range(struct logic_pio_hwaddr *range) +{ + mutex_lock(&io_range_mutex); + list_del_rcu(&range->list); + mutex_unlock(&io_range_mutex); + synchronize_rcu(); +} + /** * find_io_range_by_fwnode - find logical PIO range for given FW node * @fwnode: FW node handle associated with logical PIO range -- cgit v1.2.1 From 1b15a5632a809ab57d403fd972ca68785363b654 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 30 Jul 2019 21:29:55 +0800 Subject: bus: hisi_lpc: Unregister logical PIO range to avoid potential use-after-free If, after registering a logical PIO range, the driver probe later fails, the logical PIO range memory will be released automatically. This causes an issue, in that the logical PIO range is not unregistered and the released range memory may be later referenced. Fix by unregistering the logical PIO range. And since we now unregister the logical PIO range for probe failure, avoid the special ordering of setting logical PIO range ops, which was the previous (poor) attempt at a safeguard against this. Cc: stable@vger.kernel.org Fixes: adf38bb0b595 ("HISI LPC: Support the LPC host on Hip06/Hip07 with DT bindings") Signed-off-by: John Garry Signed-off-by: Wei Xu --- drivers/bus/hisi_lpc.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/bus/hisi_lpc.c b/drivers/bus/hisi_lpc.c index 19d7b6ff2f17..6d301aafcad2 100644 --- a/drivers/bus/hisi_lpc.c +++ b/drivers/bus/hisi_lpc.c @@ -606,24 +606,25 @@ static int hisi_lpc_probe(struct platform_device *pdev) range->fwnode = dev->fwnode; range->flags = LOGIC_PIO_INDIRECT; range->size = PIO_INDIRECT_SIZE; + range->hostdata = lpcdev; + range->ops = &hisi_lpc_ops; + lpcdev->io_host = range; ret = logic_pio_register_range(range); if (ret) { dev_err(dev, "register IO range failed (%d)!\n", ret); return ret; } - lpcdev->io_host = range; /* register the LPC host PIO resources */ if (acpi_device) ret = hisi_lpc_acpi_probe(dev); else ret = of_platform_populate(dev->of_node, NULL, NULL, dev); - if (ret) + if (ret) { + logic_pio_unregister_range(range); return ret; - - lpcdev->io_host->hostdata = lpcdev; - lpcdev->io_host->ops = &hisi_lpc_ops; + } io_end = lpcdev->io_host->io_start + lpcdev->io_host->size; dev_info(dev, "registered range [%pa - %pa]\n", -- cgit v1.2.1 From 10e62b47973b0b0ceda076255bcb147b83e20517 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 30 Jul 2019 21:29:56 +0800 Subject: bus: hisi_lpc: Add .remove method to avoid driver unbind crash The original driver author seemed to be under the impression that a driver cannot be removed if it does not have a .remove method. Or maybe if it is a built-in platform driver. This is not true. This crash can be created: root@ubuntu:/sys/bus/platform/drivers/hisi-lpc# echo HISI0191\:00 > unbind root@ubuntu:/sys/bus/platform/drivers/hisi-lpc# ipmitool raw 6 1 Unable to handle kernel paging request at virtual address ffff000010035010 Mem abort info: ESR = 0x96000047 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000047 CM = 0, WnR = 1 swapper pgtable: 4k pages, 48-bit VAs, pgdp=000000000118b000 [ffff000010035010] pgd=0000041ffbfff003, pud=0000041ffbffe003, pmd=0000041ffbffd003, pte=0000000000000000 Internal error: Oops: 96000047 [#1] PREEMPT SMP Modules linked in: CPU: 17 PID: 1473 Comm: ipmitool Not tainted 5.2.0-rc5-00003-gf68c53b414a3-dirty #198 Hardware name: Huawei Taishan 2280 /D05, BIOS Hisilicon D05 IT21 Nemo 2.0 RC0 04/18/2018 pstate: 20000085 (nzCv daIf -PAN -UAO) pc : hisi_lpc_target_in+0x7c/0x120 lr : hisi_lpc_target_in+0x70/0x120 sp : ffff00001efe3930 x29: ffff00001efe3930 x28: ffff841f9f599200 x27: 0000000000000002 x26: 0000000000000000 x25: 0000000000000080 x24: 00000000000000e4 x23: 0000000000000000 x22: 0000000000000064 x21: ffff801fb667d280 x20: 0000000000000001 x19: ffff00001efe39ac x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : ffff841febe60340 x7 : ffff801fb55c52e8 x6 : 0000000000000000 x5 : 0000000000ffc0e3 x4 : 0000000000000001 x3 : ffff801fb667d280 x2 : 0000000000000001 x1 : ffff000010035010 x0 : ffff000010035000 Call trace: hisi_lpc_target_in+0x7c/0x120 hisi_lpc_comm_in+0x88/0x98 logic_inb+0x5c/0xb8 port_inb+0x18/0x20 bt_event+0x38/0x808 smi_event_handler+0x4c/0x5a0 check_start_timer_thread.part.4+0x40/0x58 sender+0x78/0x88 smi_send.isra.6+0x94/0x108 i_ipmi_request+0x2c4/0x8f8 ipmi_request_settime+0x124/0x160 handle_send_req+0x19c/0x208 ipmi_ioctl+0x2c0/0x990 do_vfs_ioctl+0xb8/0x8f8 ksys_ioctl+0x80/0xb8 __arm64_sys_ioctl+0x1c/0x28 el0_svc_common.constprop.0+0x64/0x160 el0_svc_handler+0x28/0x78 el0_svc+0x8/0xc Code: 941d1511 aa0003f9 f94006a0 91004001 (b9000034) ---[ end trace aa842b86af7069e4 ]--- The problem here is that the host goes away but the associated logical PIO region remains registered, as do the children devices. Fix by adding a .remove method to tidy-up by removing the child devices and unregistering the logical PIO region. Cc: stable@vger.kernel.org Fixes: adf38bb0b595 ("HISI LPC: Support the LPC host on Hip06/Hip07 with DT bindings") Signed-off-by: John Garry Signed-off-by: Wei Xu --- drivers/bus/hisi_lpc.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/bus/hisi_lpc.c b/drivers/bus/hisi_lpc.c index 6d301aafcad2..20c957185af2 100644 --- a/drivers/bus/hisi_lpc.c +++ b/drivers/bus/hisi_lpc.c @@ -456,6 +456,17 @@ struct hisi_lpc_acpi_cell { size_t pdata_size; }; +static void hisi_lpc_acpi_remove(struct device *hostdev) +{ + struct acpi_device *adev = ACPI_COMPANION(hostdev); + struct acpi_device *child; + + device_for_each_child(hostdev, NULL, hisi_lpc_acpi_remove_subdev); + + list_for_each_entry(child, &adev->children, node) + acpi_device_clear_enumerated(child); +} + /* * hisi_lpc_acpi_probe - probe children for ACPI FW * @hostdev: LPC host device pointer @@ -555,8 +566,7 @@ static int hisi_lpc_acpi_probe(struct device *hostdev) return 0; fail: - device_for_each_child(hostdev, NULL, - hisi_lpc_acpi_remove_subdev); + hisi_lpc_acpi_remove(hostdev); return ret; } @@ -569,6 +579,10 @@ static int hisi_lpc_acpi_probe(struct device *dev) { return -ENODEV; } + +static void hisi_lpc_acpi_remove(struct device *hostdev) +{ +} #endif // CONFIG_ACPI /* @@ -626,6 +640,8 @@ static int hisi_lpc_probe(struct platform_device *pdev) return ret; } + dev_set_drvdata(dev, lpcdev); + io_end = lpcdev->io_host->io_start + lpcdev->io_host->size; dev_info(dev, "registered range [%pa - %pa]\n", &lpcdev->io_host->io_start, &io_end); @@ -633,6 +649,23 @@ static int hisi_lpc_probe(struct platform_device *pdev) return ret; } +static int hisi_lpc_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct acpi_device *acpi_device = ACPI_COMPANION(dev); + struct hisi_lpc_dev *lpcdev = dev_get_drvdata(dev); + struct logic_pio_hwaddr *range = lpcdev->io_host; + + if (acpi_device) + hisi_lpc_acpi_remove(dev); + else + of_platform_depopulate(dev); + + logic_pio_unregister_range(range); + + return 0; +} + static const struct of_device_id hisi_lpc_of_match[] = { { .compatible = "hisilicon,hip06-lpc", }, { .compatible = "hisilicon,hip07-lpc", }, @@ -646,5 +679,6 @@ static struct platform_driver hisi_lpc_driver = { .acpi_match_table = ACPI_PTR(hisi_lpc_acpi_match), }, .probe = hisi_lpc_probe, + .remove = hisi_lpc_remove, }; builtin_platform_driver(hisi_lpc_driver); -- cgit v1.2.1 From 7cdf6e40537f4f287c8e21b99cb4cd082a33bef0 Mon Sep 17 00:00:00 2001 From: Aaron Armstrong Skomra Date: Mon, 12 Aug 2019 11:55:52 -0700 Subject: HID: wacom: add back changes dropped in merge commit Merge commit 74acee309fb2 ("Merge branches 'for-5.2/fixes', 'for-5.3/doc', 'for-5.3/ish', 'for-5.3/logitech' and 'for-5.3/wacom' into for-linus") inadvertently dropped this change from commit 912c6aa67ad4 ("HID: wacom: Add 2nd gen Intuos Pro Small support"). Signed-off-by: Aaron Armstrong Skomra Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 7a8ddc999a8e..50074485b88b 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1290,7 +1290,8 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom) } if (wacom->tool[0]) { input_report_abs(pen_input, ABS_PRESSURE, get_unaligned_le16(&frame[5])); - if (wacom->features.type == INTUOSP2_BT) { + if (wacom->features.type == INTUOSP2_BT || + wacom->features.type == INTUOSP2S_BT) { input_report_abs(pen_input, ABS_DISTANCE, range ? frame[13] : wacom->features.distance_max); } else { -- cgit v1.2.1 From 45da5e09dd32fa98c32eaafe2513db6bd75e2f4f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 23 Jul 2019 04:37:45 -0700 Subject: ARM: OMAP2+: Fix omap4 errata warning on other SoCs We have errata i688 workaround produce warnings on SoCs other than omap4 and omap5: omap4_sram_init:Unable to allocate sram needed to handle errata I688 omap4_sram_init:Unable to get sram pool needed to handle errata I688 This is happening because there is no ti,omap4-mpu node, or no SRAM to configure for the other SoCs, so let's remove the warning based on the SoC revision checks. As nobody has complained it seems that the other SoC variants do not need this workaround. Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap4-common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index b226c8aaf8b1..7074cfd1ff41 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -131,6 +131,9 @@ static int __init omap4_sram_init(void) struct device_node *np; struct gen_pool *sram_pool; + if (!soc_is_omap44xx() && !soc_is_omap54xx()) + return 0; + np = of_find_compatible_node(NULL, NULL, "ti,omap4-mpu"); if (!np) pr_warn("%s:Unable to allocate sram needed to handle errata I688\n", -- cgit v1.2.1 From 8613e2ca4fff764f23785eadfa54a08631ee682a Mon Sep 17 00:00:00 2001 From: Emmanuel Vadot Date: Wed, 24 Jul 2019 14:23:29 +0200 Subject: ARM: dts: am335x: Fix UARTs length As seen on the AM335x TRM all the UARTs controller only are 0x1000 in size. Fix this in the DTS. Signed-off-by: Emmanuel Vadot Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am33xx-l4.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi index ced1a19d5f89..a20b04b72be4 100644 --- a/arch/arm/boot/dts/am33xx-l4.dtsi +++ b/arch/arm/boot/dts/am33xx-l4.dtsi @@ -185,7 +185,7 @@ uart0: serial@0 { compatible = "ti,am3352-uart", "ti,omap3-uart"; clock-frequency = <48000000>; - reg = <0x0 0x2000>; + reg = <0x0 0x1000>; interrupts = <72>; status = "disabled"; dmas = <&edma 26 0>, <&edma 27 0>; @@ -934,7 +934,7 @@ uart1: serial@0 { compatible = "ti,am3352-uart", "ti,omap3-uart"; clock-frequency = <48000000>; - reg = <0x0 0x2000>; + reg = <0x0 0x1000>; interrupts = <73>; status = "disabled"; dmas = <&edma 28 0>, <&edma 29 0>; @@ -966,7 +966,7 @@ uart2: serial@0 { compatible = "ti,am3352-uart", "ti,omap3-uart"; clock-frequency = <48000000>; - reg = <0x0 0x2000>; + reg = <0x0 0x1000>; interrupts = <74>; status = "disabled"; dmas = <&edma 30 0>, <&edma 31 0>; @@ -1614,7 +1614,7 @@ uart3: serial@0 { compatible = "ti,am3352-uart", "ti,omap3-uart"; clock-frequency = <48000000>; - reg = <0x0 0x2000>; + reg = <0x0 0x1000>; interrupts = <44>; status = "disabled"; }; @@ -1644,7 +1644,7 @@ uart4: serial@0 { compatible = "ti,am3352-uart", "ti,omap3-uart"; clock-frequency = <48000000>; - reg = <0x0 0x2000>; + reg = <0x0 0x1000>; interrupts = <45>; status = "disabled"; }; @@ -1674,7 +1674,7 @@ uart5: serial@0 { compatible = "ti,am3352-uart", "ti,omap3-uart"; clock-frequency = <48000000>; - reg = <0x0 0x2000>; + reg = <0x0 0x1000>; interrupts = <46>; status = "disabled"; }; -- cgit v1.2.1 From 07f9a8be66a9bd86f9eaedf8f8aeb416195adab8 Mon Sep 17 00:00:00 2001 From: Faiz Abbas Date: Wed, 7 Aug 2019 16:22:38 +0530 Subject: ARM: dts: dra74x: Fix iodelay configuration for mmc3 According to the latest am572x[1] and dra74x[2] data manuals, mmc3 default, hs, sdr12 and sdr25 modes use iodelay values given in MMC3_MANUAL1. Set the MODE_SELECT bit for these so that manual mode is selected and correct iodelay values can be configured. [1] http://www.ti.com/lit/ds/symlink/am5728.pdf [2] http://www.ti.com/lit/ds/symlink/dra746.pdf Signed-off-by: Faiz Abbas Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra74x-mmc-iodelay.dtsi | 50 +++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/arch/arm/boot/dts/dra74x-mmc-iodelay.dtsi b/arch/arm/boot/dts/dra74x-mmc-iodelay.dtsi index 28ebb4eb884a..214b9e6de2c3 100644 --- a/arch/arm/boot/dts/dra74x-mmc-iodelay.dtsi +++ b/arch/arm/boot/dts/dra74x-mmc-iodelay.dtsi @@ -32,7 +32,7 @@ * * Datamanual Revisions: * - * AM572x Silicon Revision 2.0: SPRS953B, Revised November 2016 + * AM572x Silicon Revision 2.0: SPRS953F, Revised May 2019 * AM572x Silicon Revision 1.1: SPRS915R, Revised November 2016 * */ @@ -229,45 +229,45 @@ mmc3_pins_default: mmc3_pins_default { pinctrl-single,pins = < - DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_clk.mmc3_clk */ - DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */ - DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */ - DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */ - DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */ - DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */ + DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_clk.mmc3_clk */ + DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */ + DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */ + DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */ + DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */ + DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */ >; }; mmc3_pins_hs: mmc3_pins_hs { pinctrl-single,pins = < - DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_clk.mmc3_clk */ - DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */ - DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */ - DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */ - DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */ - DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */ + DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_clk.mmc3_clk */ + DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */ + DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */ + DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */ + DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */ + DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */ >; }; mmc3_pins_sdr12: mmc3_pins_sdr12 { pinctrl-single,pins = < - DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_clk.mmc3_clk */ - DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */ - DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */ - DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */ - DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */ - DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */ + DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_clk.mmc3_clk */ + DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */ + DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */ + DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */ + DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */ + DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */ >; }; mmc3_pins_sdr25: mmc3_pins_sdr25 { pinctrl-single,pins = < - DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_clk.mmc3_clk */ - DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */ - DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */ - DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */ - DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */ - DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */ + DRA7XX_CORE_IOPAD(0x377c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_clk.mmc3_clk */ + DRA7XX_CORE_IOPAD(0x3780, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_cmd.mmc3_cmd */ + DRA7XX_CORE_IOPAD(0x3784, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat0.mmc3_dat0 */ + DRA7XX_CORE_IOPAD(0x3788, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat1.mmc3_dat1 */ + DRA7XX_CORE_IOPAD(0x378c, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat2.mmc3_dat2 */ + DRA7XX_CORE_IOPAD(0x3790, (PIN_INPUT_PULLUP | MODE_SELECT | MUX_MODE0)) /* mmc3_dat3.mmc3_dat3 */ >; }; -- cgit v1.2.1 From fa8397e45c64e60c80373bc19ee56e42a6bed9b6 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Sun, 11 Aug 2019 10:48:02 +0200 Subject: ARM: OMAP1: ams-delta-fiq: Fix missing irq_ack Non-serio path of Amstrad Delta FIQ deferred handler depended on irq_ack() method provided by OMAP GPIO driver. That method has been removed by commit 693de831c6e5 ("gpio: omap: remove irq_ack method"). Remove useless code from the deferred handler and reimplement the missing operation inside the base FIQ handler. Should another dependency - irq_unmask() - be ever removed from the OMAP GPIO driver, WARN once if missing. Signed-off-by: Janusz Krzysztofik Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/ams-delta-fiq-handler.S | 3 ++- arch/arm/mach-omap1/ams-delta-fiq.c | 4 +--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-omap1/ams-delta-fiq-handler.S b/arch/arm/mach-omap1/ams-delta-fiq-handler.S index 7c9fb7fe0070..938d872b6142 100644 --- a/arch/arm/mach-omap1/ams-delta-fiq-handler.S +++ b/arch/arm/mach-omap1/ams-delta-fiq-handler.S @@ -129,6 +129,8 @@ restart: orr r11, r11, r13 @ mask all requested interrupts str r11, [r12, #OMAP1510_GPIO_INT_MASK] + str r13, [r12, #OMAP1510_GPIO_INT_STATUS] @ ack all requested interrupts + ands r10, r13, #KEYBRD_CLK_MASK @ extract keyboard status - set? beq hksw @ no - try next source @@ -136,7 +138,6 @@ restart: @@@@@@@@@@@@@@@@@@@@@@ @ Keyboard clock FIQ mode interrupt handler @ r10 now contains KEYBRD_CLK_MASK, use it - str r10, [r12, #OMAP1510_GPIO_INT_STATUS] @ ack the interrupt bic r11, r11, r10 @ unmask it str r11, [r12, #OMAP1510_GPIO_INT_MASK] diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c index 51212133ce06..dedaf715f47c 100644 --- a/arch/arm/mach-omap1/ams-delta-fiq.c +++ b/arch/arm/mach-omap1/ams-delta-fiq.c @@ -72,9 +72,7 @@ static irqreturn_t deferred_fiq(int irq, void *dev_id) * interrupts default to since commit 80ac93c27441 * requires interrupt already acked and unmasked. */ - if (irq_chip->irq_ack) - irq_chip->irq_ack(d); - if (irq_chip->irq_unmask) + if (!WARN_ON_ONCE(!irq_chip->irq_unmask)) irq_chip->irq_unmask(d); } for (; irq_counter[gpio] < fiq_count; irq_counter[gpio]++) -- cgit v1.2.1 From a304f483b6b00d42bde41c45ca52c670945348e2 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Tue, 25 Jun 2019 18:33:15 -0500 Subject: bus: ti-sysc: Simplify cleanup upon failures in sysc_probe() The clocks are not yet parsed and prepared until after a successful sysc_get_clocks(), so there is no need to unprepare the clocks upon any failure of any of the prior functions in sysc_probe(). The current code path would have been a no-op because of the clock validity checks within sysc_unprepare(), but let's just simplify the cleanup path by returning the error directly. While at this, also fix the cleanup path for a sysc_init_resets() failure which is executed after the clocks are prepared. Signed-off-by: Suman Anna Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index a319e1a748fe..2db474ab4c6b 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -2383,27 +2383,27 @@ static int sysc_probe(struct platform_device *pdev) error = sysc_init_dts_quirks(ddata); if (error) - goto unprepare; + return error; error = sysc_map_and_check_registers(ddata); if (error) - goto unprepare; + return error; error = sysc_init_sysc_mask(ddata); if (error) - goto unprepare; + return error; error = sysc_init_idlemodes(ddata); if (error) - goto unprepare; + return error; error = sysc_init_syss_mask(ddata); if (error) - goto unprepare; + return error; error = sysc_init_pdata(ddata); if (error) - goto unprepare; + return error; sysc_init_early_quirks(ddata); @@ -2413,7 +2413,7 @@ static int sysc_probe(struct platform_device *pdev) error = sysc_init_resets(ddata); if (error) - return error; + goto unprepare; error = sysc_init_module(ddata); if (error) -- cgit v1.2.1 From 5b63fb90adb95a178ad403e1703f59bf1ff2c16b Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 23 Jul 2019 00:29:23 -0700 Subject: ARM: dts: Fix incomplete dts data for am3 and am4 mmc Commit 4e27f752ab8c ("ARM: OMAP2+: Drop mmc platform data for am330x and am43xx") dropped legacy mmc platform data for am3 and am4, but missed the fact that we never updated the dts files for mmc3 that is directly on l3 interconnect instead of l4 interconnect. This leads to a situation with no legacy platform data and incomplete dts data. Let's update the mmc instances on l3 interconnect to probe properly with ti-sysc interconnect target module driver to make mmc3 work again. Let's still keep legacy "ti,hwmods" property around for v5.2 kernel and only drop it later on. Note that there is no need to use property status = "disabled" for mmc3. The default for dts is enabled, and runtime PM will idle unused instances just fine. Fixes: 4e27f752ab8c ("ARM: OMAP2+: Drop mmc platform data for am330x and am43xx") Reported-by: David Lechner Tested-by: David Lechner Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am33xx.dtsi | 32 ++++++++++++++++++++++++++------ arch/arm/boot/dts/am4372.dtsi | 32 ++++++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index e5c2f71a7c77..fb6b8aa12cc5 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -234,13 +234,33 @@ interrupt-names = "edma3_tcerrint"; }; - mmc3: mmc@47810000 { - compatible = "ti,omap4-hsmmc"; + target-module@47810000 { + compatible = "ti,sysc-omap2", "ti,sysc"; ti,hwmods = "mmc3"; - ti,needs-special-reset; - interrupts = <29>; - reg = <0x47810000 0x1000>; - status = "disabled"; + reg = <0x478102fc 0x4>, + <0x47810110 0x4>, + <0x47810114 0x4>; + reg-names = "rev", "sysc", "syss"; + ti,sysc-mask = <(SYSC_OMAP2_CLOCKACTIVITY | + SYSC_OMAP2_ENAWAKEUP | + SYSC_OMAP2_SOFTRESET | + SYSC_OMAP2_AUTOIDLE)>; + ti,sysc-sidle = , + , + ; + ti,syss-mask = <1>; + clocks = <&l3s_clkctrl AM3_L3S_MMC3_CLKCTRL 0>; + clock-names = "fck"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x47810000 0x1000>; + + mmc3: mmc@0 { + compatible = "ti,omap4-hsmmc"; + ti,needs-special-reset; + interrupts = <29>; + reg = <0x0 0x1000>; + }; }; usb: usb@47400000 { diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index 55aff4db9c7c..848e2a8884e2 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -228,13 +228,33 @@ interrupt-names = "edma3_tcerrint"; }; - mmc3: mmc@47810000 { - compatible = "ti,omap4-hsmmc"; - reg = <0x47810000 0x1000>; + target-module@47810000 { + compatible = "ti,sysc-omap2", "ti,sysc"; ti,hwmods = "mmc3"; - ti,needs-special-reset; - interrupts = ; - status = "disabled"; + reg = <0x478102fc 0x4>, + <0x47810110 0x4>, + <0x47810114 0x4>; + reg-names = "rev", "sysc", "syss"; + ti,sysc-mask = <(SYSC_OMAP2_CLOCKACTIVITY | + SYSC_OMAP2_ENAWAKEUP | + SYSC_OMAP2_SOFTRESET | + SYSC_OMAP2_AUTOIDLE)>; + ti,sysc-sidle = , + , + ; + ti,syss-mask = <1>; + clocks = <&l3s_clkctrl AM4_L3S_MMC3_CLKCTRL 0>; + clock-names = "fck"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x47810000 0x1000>; + + mmc3: mmc@0 { + compatible = "ti,omap4-hsmmc"; + ti,needs-special-reset; + interrupts = ; + reg = <0x0 0x1000>; + }; }; sham: sham@53100000 { -- cgit v1.2.1 From 0c0d1ec21b2f6070e22b54b2de874f1af960ced8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sun, 28 Jul 2019 18:22:40 -0500 Subject: ARM: OMAP: dma: Mark expected switch fall-throughs Mark switch cases where we are expecting to fall through. This patch fixes the following warnings: arch/arm/plat-omap/dma.c: In function 'omap_set_dma_src_burst_mode': arch/arm/plat-omap/dma.c:384:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (dma_omap2plus()) { ^ arch/arm/plat-omap/dma.c:393:2: note: here case OMAP_DMA_DATA_BURST_16: ^~~~ arch/arm/plat-omap/dma.c:394:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (dma_omap2plus()) { ^ arch/arm/plat-omap/dma.c:402:2: note: here default: ^~~~~~~ arch/arm/plat-omap/dma.c: In function 'omap_set_dma_dest_burst_mode': arch/arm/plat-omap/dma.c:473:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (dma_omap2plus()) { ^ arch/arm/plat-omap/dma.c:481:2: note: here default: ^~~~~~~ Notice that, in this particular case, the code comment is modified in accordance with what GCC is expecting to find. Reported-by: Stephen Rothwell Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/dma.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index 79f43acf9acb..08c99413d02c 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -388,17 +388,15 @@ void omap_set_dma_src_burst_mode(int lch, enum omap_dma_burst_mode burst_mode) /* * not supported by current hardware on OMAP1 * w |= (0x03 << 7); - * fall through */ + /* fall through */ case OMAP_DMA_DATA_BURST_16: if (dma_omap2plus()) { burst = 0x3; break; } - /* - * OMAP1 don't support burst 16 - * fall through - */ + /* OMAP1 don't support burst 16 */ + /* fall through */ default: BUG(); } @@ -474,10 +472,8 @@ void omap_set_dma_dest_burst_mode(int lch, enum omap_dma_burst_mode burst_mode) burst = 0x3; break; } - /* - * OMAP1 don't support burst 16 - * fall through - */ + /* OMAP1 don't support burst 16 */ + /* fall through */ default: printk(KERN_ERR "Invalid DMA burst mode\n"); BUG(); -- cgit v1.2.1 From d8e0cecbcaf09f38661bfb7da4c1e62297308672 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Wed, 26 Jun 2019 13:20:14 +0530 Subject: soc: ti: pm33xx: Fix static checker warnings The patch fixes a bunch of static checker warnings. Reported-by: Dan Carpenter Signed-off-by: Keerthy Signed-off-by: Tony Lindgren --- drivers/soc/ti/pm33xx.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/soc/ti/pm33xx.c b/drivers/soc/ti/pm33xx.c index bb77c220b6f8..5f3a4499cf40 100644 --- a/drivers/soc/ti/pm33xx.c +++ b/drivers/soc/ti/pm33xx.c @@ -252,7 +252,7 @@ static int am33xx_pm_begin(suspend_state_t state) if (state == PM_SUSPEND_MEM && pm_ops->check_off_mode_enable()) { nvmem = devm_nvmem_device_get(&omap_rtc->dev, "omap_rtc_scratch0"); - if (nvmem) + if (!IS_ERR(nvmem)) nvmem_device_write(nvmem, RTC_SCRATCH_MAGIC_REG * 4, 4, (void *)&rtc_magic_val); rtc_only_idle = 1; @@ -278,9 +278,12 @@ static void am33xx_pm_end(void) struct nvmem_device *nvmem; nvmem = devm_nvmem_device_get(&omap_rtc->dev, "omap_rtc_scratch0"); + if (IS_ERR(nvmem)) + return; + m3_ipc->ops->finish_low_power(m3_ipc); if (rtc_only_idle) { - if (retrigger_irq) + if (retrigger_irq) { /* * 32 bits of Interrupt Set-Pending correspond to 32 * 32 interrupts. Compute the bit offset of the @@ -291,8 +294,10 @@ static void am33xx_pm_end(void) writel_relaxed(1 << (retrigger_irq & 31), gic_dist_base + GIC_INT_SET_PENDING_BASE + retrigger_irq / 32 * 4); - nvmem_device_write(nvmem, RTC_SCRATCH_MAGIC_REG * 4, 4, - (void *)&val); + } + + nvmem_device_write(nvmem, RTC_SCRATCH_MAGIC_REG * 4, 4, + (void *)&val); } rtc_only_idle = 0; @@ -415,7 +420,7 @@ static int am33xx_pm_rtc_setup(void) nvmem = devm_nvmem_device_get(&omap_rtc->dev, "omap_rtc_scratch0"); - if (nvmem) { + if (!IS_ERR(nvmem)) { nvmem_device_read(nvmem, RTC_SCRATCH_MAGIC_REG * 4, 4, (void *)&rtc_magic_val); if ((rtc_magic_val & 0xffff) != RTC_REG_BOOT_MAGIC) -- cgit v1.2.1 From 4a65bbb9109ed7edd4b6ed7168ced48abb8561a2 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 13 Apr 2019 22:12:43 +0800 Subject: soc: ti: pm33xx: Make two symbols static Fix sparse warnings: drivers/soc/ti/pm33xx.c:144:27: warning: symbol 'rtc_wake_src' was not declared. Should it be static? drivers/soc/ti/pm33xx.c:160:5: warning: symbol 'am33xx_rtc_only_idle' was not declared. Should it be static? Signed-off-by: YueHaibing Signed-off-by: Tony Lindgren --- drivers/soc/ti/pm33xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soc/ti/pm33xx.c b/drivers/soc/ti/pm33xx.c index 5f3a4499cf40..ccc6d53fe788 100644 --- a/drivers/soc/ti/pm33xx.c +++ b/drivers/soc/ti/pm33xx.c @@ -141,7 +141,7 @@ static int __init am43xx_map_gic(void) } #ifdef CONFIG_SUSPEND -struct wkup_m3_wakeup_src rtc_wake_src(void) +static struct wkup_m3_wakeup_src rtc_wake_src(void) { u32 i; @@ -157,7 +157,7 @@ struct wkup_m3_wakeup_src rtc_wake_src(void) return rtc_ext_wakeup; } -int am33xx_rtc_only_idle(unsigned long wfi_flags) +static int am33xx_rtc_only_idle(unsigned long wfi_flags) { omap_rtc_power_off_program(&omap_rtc->dev); am33xx_do_wfi_sram(wfi_flags); -- cgit v1.2.1 From 91b4db5313a2c793aabc2143efb8ed0cf0fdd097 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 12 Aug 2019 18:18:07 +0200 Subject: s390/bpf: use 32-bit index for tail calls "p runtime/jit: pass > 32bit index to tail_call" fails when bpf_jit_enable=1, because the tail call is not executed. This in turn is because the generated code assumes index is 64-bit, while it must be 32-bit, and as a result prog array bounds check fails, while it should pass. Even if bounds check would have passed, the code that follows uses 64-bit index to compute prog array offset. Fix by using clrj instead of clgrj for comparing index with array size, and also by using llgfr for truncating index to 32 bits before using it to compute prog array offset. Fixes: 6651ee070b31 ("s390/bpf: implement bpf_tail_call() helper") Reported-by: Yauheni Kaliuta Acked-by: Vasily Gorbik Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann --- arch/s390/net/bpf_jit_comp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 6299156f9738..955eb355c2fd 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1049,8 +1049,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i /* llgf %w1,map.max_entries(%b2) */ EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2, offsetof(struct bpf_array, map.max_entries)); - /* clgrj %b3,%w1,0xa,label0: if %b3 >= %w1 goto out */ - EMIT6_PCREL_LABEL(0xec000000, 0x0065, BPF_REG_3, + /* clrj %b3,%w1,0xa,label0: if (u32)%b3 >= (u32)%w1 goto out */ + EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3, REG_W1, 0, 0xa); /* @@ -1076,8 +1076,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i * goto out; */ - /* sllg %r1,%b3,3: %r1 = index * 8 */ - EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, BPF_REG_3, REG_0, 3); + /* llgfr %r1,%b3: %r1 = (u32) index */ + EMIT4(0xb9160000, REG_1, BPF_REG_3); + /* sllg %r1,%r1,3: %r1 *= 8 */ + EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, REG_1, REG_0, 3); /* lg %r1,prog(%b2,%r1) */ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2, REG_1, offsetof(struct bpf_array, ptrs)); -- cgit v1.2.1 From addf3382c47c033e579c9c88f18e36c4e75d806a Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 13 Aug 2019 15:38:06 +0200 Subject: Revert "HID: logitech-hidpp: add USB PID for a few more supported mice" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This partially reverts commit 27fc32fd9417968a459d43d9a7c50fd423d53eb9. It turns out that the G502 has some issues with hid-logitech-hidpp: when plugging it in, the driver tries to contact it but it fails. So the driver bails out leaving only the mouse event node available. This timeout is problematic as it introduce a delay in the boot, and having only the mouse event node means that the hardware macros keys can not be relayed to the userspace. Filipe and I just gave a shot at the following devices: G403 Wireless (0xC082) G703 (0xC087) G703 Hero (0xC090) G903 (0xC086) G903 Hero (0xC091) G Pro (0xC088) Reverting the devices we are not sure that works flawlessly. Reviewed-by: Filipe Laíns Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-logitech-hidpp.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 21268c9fa71a..343052b117a9 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -3749,30 +3749,10 @@ static const struct hid_device_id hidpp_devices[] = { { L27MHZ_DEVICE(HID_ANY_ID) }, - { /* Logitech G203/Prodigy Gaming Mouse */ - HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC084) }, - { /* Logitech G302 Gaming Mouse */ - HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC07F) }, - { /* Logitech G303 Gaming Mouse */ - HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC080) }, - { /* Logitech G400 Gaming Mouse */ - HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC07E) }, { /* Logitech G403 Wireless Gaming Mouse over USB */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC082) }, - { /* Logitech G403 Gaming Mouse */ - HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC083) }, - { /* Logitech G403 Hero Gaming Mouse over USB */ - HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC08F) }, - { /* Logitech G502 Proteus Core Gaming Mouse */ - HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC07D) }, - { /* Logitech G502 Proteus Spectrum Gaming Mouse over USB */ - HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC332) }, - { /* Logitech G502 Hero Gaming Mouse over USB */ - HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC08B) }, { /* Logitech G700 Gaming Mouse over USB */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC06B) }, - { /* Logitech G700s Gaming Mouse over USB */ - HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC07C) }, { /* Logitech G703 Gaming Mouse over USB */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC087) }, { /* Logitech G703 Hero Gaming Mouse over USB */ -- cgit v1.2.1 From a3384b8d9f63cc042711293bb97bdc92dca0391d Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 13 Aug 2019 15:38:07 +0200 Subject: HID: logitech-hidpp: remove support for the G700 over USB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The G700 suffers from the same issue than the G502: when plugging it in, the driver tries to contact it but it fails. This timeout is problematic as it introduce a delay in the boot, and having only the mouse event node means that the hardware macros keys can not be relayed to the userspace. Link: https://github.com/libratbag/libratbag/issues/797 Fixes: 91cf9a98ae41 ("HID: logitech-hidpp: make .probe usbhid capable") Cc: stable@vger.kernel.org # v5.2 Reviewed-by: Filipe Laíns Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-logitech-hidpp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 343052b117a9..0179f7ed77e5 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -3751,8 +3751,6 @@ static const struct hid_device_id hidpp_devices[] = { { /* Logitech G403 Wireless Gaming Mouse over USB */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC082) }, - { /* Logitech G700 Gaming Mouse over USB */ - HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC06B) }, { /* Logitech G703 Gaming Mouse over USB */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC087) }, { /* Logitech G703 Hero Gaming Mouse over USB */ -- cgit v1.2.1 From 656c8e9cc1badbc18eefe6ba01d33ebbcae61b9a Mon Sep 17 00:00:00 2001 From: Dirk Morris Date: Thu, 8 Aug 2019 13:57:51 -0700 Subject: netfilter: conntrack: Use consistent ct id hash calculation Change ct id hash calculation to only use invariants. Currently the ct id hash calculation is based on some fields that can change in the lifetime on a conntrack entry in some corner cases. The current hash uses the whole tuple which contains an hlist pointer which will change when the conntrack is placed on the dying list resulting in a ct id change. This patch also removes the reply-side tuple and extension pointer from the hash calculation so that the ct id will will not change from initialization until confirmation. Fixes: 3c79107631db1f7 ("netfilter: ctnetlink: don't use conntrack/expect object addresses as id") Signed-off-by: Dirk Morris Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index a542761e90d1..81a8ef42b88d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -453,13 +453,12 @@ EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); * table location, we assume id gets exposed to userspace. * * Following nf_conn items do not change throughout lifetime - * of the nf_conn after it has been committed to main hash table: + * of the nf_conn: * * 1. nf_conn address - * 2. nf_conn->ext address - * 3. nf_conn->master address (normally NULL) - * 4. tuple - * 5. the associated net namespace + * 2. nf_conn->master address (normally NULL) + * 3. the associated net namespace + * 4. the original direction tuple */ u32 nf_ct_get_id(const struct nf_conn *ct) { @@ -469,9 +468,10 @@ u32 nf_ct_get_id(const struct nf_conn *ct) net_get_random_once(&ct_id_seed, sizeof(ct_id_seed)); a = (unsigned long)ct; - b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct)); - c = (unsigned long)ct->ext; - d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash), + b = (unsigned long)ct->master; + c = (unsigned long)nf_ct_net(ct); + d = (unsigned long)siphash(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + sizeof(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple), &ct_id_seed); #ifdef CONFIG_64BIT return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed); -- cgit v1.2.1 From 074014abdf2bd2a00da3dd14a6ae04cafc1d62cc Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 13 Aug 2019 15:28:18 +0100 Subject: net: ieee802154: remove redundant assignment to rc Variable rc is initialized to a value that is never read and it is re-assigned later. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Stefan Schmidt --- net/ieee802154/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index bc6b912603f1..1e9876813392 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -1102,7 +1102,7 @@ static struct packet_type ieee802154_packet_type = { static int __init af_ieee802154_init(void) { - int rc = -EINVAL; + int rc; rc = proto_register(&ieee802154_raw_prot, 1); if (rc) -- cgit v1.2.1 From a1794de8b92ea6bc2037f445b296814ac826693e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 12 Aug 2019 20:49:12 +0800 Subject: sctp: fix the transport error_count check As the annotation says in sctp_do_8_2_transport_strike(): "If the transport error count is greater than the pf_retrans threshold, and less than pathmaxrtx ..." It should be transport->error_count checked with pathmaxrxt, instead of asoc->pf_retrans. Fixes: 5aa93bcf66f4 ("sctp: Implement quick failover draft from tsvwg") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_sideeffect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index a554d6d15d1b..1cf5bb5b73c4 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -546,7 +546,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands, */ if (net->sctp.pf_enable && (transport->state == SCTP_ACTIVE) && - (asoc->pf_retrans < transport->pathmaxrxt) && + (transport->error_count < transport->pathmaxrxt) && (transport->error_count > asoc->pf_retrans)) { sctp_assoc_control_transport(asoc, transport, -- cgit v1.2.1 From 072f79400032f74917726cf76f4248367ea2b5b8 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 12 Aug 2019 16:44:35 +0200 Subject: s390/qeth: serialize cmd reply with concurrent timeout Callbacks for a cmd reply run outside the protection of card->lock, to allow for additional cmds to be issued & enqueued in parallel. When qeth_send_control_data() bails out for a cmd without having received a reply (eg. due to timeout), its callback may concurrently be processing a reply that just arrived. In this case, the callback potentially accesses a stale reply->reply_param area that eg. was on-stack and has already been released. To avoid this race, add some locking so that qeth_send_control_data() can (1) wait for a concurrently running callback, and (2) zap any pending callback that still wants to run. Signed-off-by: Julian Wiedmann Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_core.h | 1 + drivers/s390/net/qeth_core_main.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index c7ee07ce3615..28db887d38ed 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -629,6 +629,7 @@ struct qeth_seqno { struct qeth_reply { struct list_head list; struct completion received; + spinlock_t lock; int (*callback)(struct qeth_card *, struct qeth_reply *, unsigned long); u32 seqno; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 4d0caeebc802..9c3310c4d61d 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -544,6 +544,7 @@ static struct qeth_reply *qeth_alloc_reply(struct qeth_card *card) if (reply) { refcount_set(&reply->refcnt, 1); init_completion(&reply->received); + spin_lock_init(&reply->lock); } return reply; } @@ -799,6 +800,13 @@ static void qeth_issue_next_read_cb(struct qeth_card *card, if (!reply->callback) { rc = 0; + goto no_callback; + } + + spin_lock_irqsave(&reply->lock, flags); + if (reply->rc) { + /* Bail out when the requestor has already left: */ + rc = reply->rc; } else { if (cmd) { reply->offset = (u16)((char *)cmd - (char *)iob->data); @@ -807,7 +815,9 @@ static void qeth_issue_next_read_cb(struct qeth_card *card, rc = reply->callback(card, reply, (unsigned long)iob); } } + spin_unlock_irqrestore(&reply->lock, flags); +no_callback: if (rc <= 0) qeth_notify_reply(reply, rc); qeth_put_reply(reply); @@ -1749,6 +1759,16 @@ static int qeth_send_control_data(struct qeth_card *card, rc = (timeout == -ERESTARTSYS) ? -EINTR : -ETIME; qeth_dequeue_reply(card, reply); + + if (reply_cb) { + /* Wait until the callback for a late reply has completed: */ + spin_lock_irq(&reply->lock); + if (rc) + /* Zap any callback that's still pending: */ + reply->rc = rc; + spin_unlock_irq(&reply->lock); + } + if (!rc) rc = reply->rc; qeth_put_reply(reply); -- cgit v1.2.1 From 66cf4710b23ab2adda11155684a2c8826f4fe732 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 12 Aug 2019 16:13:06 -0500 Subject: ibmveth: Convert multicast list size for little-endian system The ibm,mac-address-filters property defines the maximum number of addresses the hypervisor's multicast filter list can support. It is encoded as a big-endian integer in the OF device tree, but the virtual ethernet driver does not convert it for use by little-endian systems. As a result, the driver is not behaving as it should on affected systems when a large number of multicast addresses are assigned to the device. Reported-by: Hangbin Liu Signed-off-by: Thomas Falcon Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmveth.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index d654c234aaf7..c5be4ebd8437 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1605,7 +1605,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) struct net_device *netdev; struct ibmveth_adapter *adapter; unsigned char *mac_addr_p; - unsigned int *mcastFilterSize_p; + __be32 *mcastFilterSize_p; long ret; unsigned long ret_attr; @@ -1627,8 +1627,9 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) return -EINVAL; } - mcastFilterSize_p = (unsigned int *)vio_get_attribute(dev, - VETH_MCAST_FILTER_SIZE, NULL); + mcastFilterSize_p = (__be32 *)vio_get_attribute(dev, + VETH_MCAST_FILTER_SIZE, + NULL); if (!mcastFilterSize_p) { dev_err(&dev->dev, "Can't find VETH_MCAST_FILTER_SIZE " "attribute\n"); @@ -1645,7 +1646,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->vdev = dev; adapter->netdev = netdev; - adapter->mcastFilterSize = *mcastFilterSize_p; + adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p); adapter->pool_config = 0; netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16); -- cgit v1.2.1 From 48ec7014c56e5eb2fbf6f479896143622d834f3b Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Mon, 12 Aug 2019 14:11:35 -0500 Subject: net/mlx4_en: fix a memory leak bug In mlx4_en_config_rss_steer(), 'rss_map->indir_qp' is allocated through kzalloc(). After that, mlx4_qp_alloc() is invoked to configure RSS indirection. However, if mlx4_qp_alloc() fails, the allocated 'rss_map->indir_qp' is not deallocated, leading to a memory leak bug. To fix the above issue, add the 'qp_alloc_err' label to free 'rss_map->indir_qp'. Fixes: 4931c6ef04b4 ("net/mlx4_en: Optimized single ring steering") Signed-off-by: Wenwen Wang Reviewed-by: Tariq Toukan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 6c01314e87b0..db3552f2d087 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1187,7 +1187,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp); if (err) { en_err(priv, "Failed to allocate RSS indirection QP\n"); - goto rss_err; + goto qp_alloc_err; } rss_map->indir_qp->event = mlx4_en_sqp_event; @@ -1241,6 +1241,7 @@ indir_err: MLX4_QP_STATE_RST, NULL, 0, 0, rss_map->indir_qp); mlx4_qp_remove(mdev->dev, rss_map->indir_qp); mlx4_qp_free(mdev->dev, rss_map->indir_qp); +qp_alloc_err: kfree(rss_map->indir_qp); rss_map->indir_qp = NULL; rss_err: -- cgit v1.2.1 From c36757eb9dee13681227ad3676d37f14b3a2b2af Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 12 Aug 2019 21:20:02 +0200 Subject: net: phy: consider AN_RESTART status when reading link status After configuring and restarting aneg we immediately try to read the link status. On some systems the PHY may not yet have cleared the "aneg complete" and "link up" bits, resulting in a false link-up signal. See [0] for a report. Clause 22 and 45 both require the PHY to keep the AN_RESTART bit set until the PHY actually starts auto-negotiation. Let's consider this in the generic functions for reading link status. The commit marked as fixed is the first one where the patch applies cleanly. [0] https://marc.info/?t=156518400300003&r=1&w=2 Fixes: c1164bb1a631 ("net: phy: check PMAPMD link status only in genphy_c45_read_link") Tested-by: Yonglong Liu Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy-c45.c | 14 ++++++++++++++ drivers/net/phy/phy_device.c | 12 +++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index b9d4145781ca..58bb25e4af10 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -219,6 +219,20 @@ int genphy_c45_read_link(struct phy_device *phydev) int val, devad; bool link = true; + if (phydev->c45_ids.devices_in_package & MDIO_DEVS_AN) { + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1); + if (val < 0) + return val; + + /* Autoneg is being started, therefore disregard current + * link status and report link as down. + */ + if (val & MDIO_AN_CTRL1_RESTART) { + phydev->link = 0; + return 0; + } + } + while (mmd_mask && link) { devad = __ffs(mmd_mask); mmd_mask &= ~BIT(devad); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 7ddd91df99e3..27ebc2c6c2d0 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1752,7 +1752,17 @@ EXPORT_SYMBOL(genphy_aneg_done); */ int genphy_update_link(struct phy_device *phydev) { - int status; + int status = 0, bmcr; + + bmcr = phy_read(phydev, MII_BMCR); + if (bmcr < 0) + return bmcr; + + /* Autoneg is being started, therefore disregard BMSR value and + * report link as down. + */ + if (bmcr & BMCR_ANRESTART) + goto done; /* The link state is latched low so that momentary link * drops can be detected. Do not double-read the status -- cgit v1.2.1 From d00ee64e1dcf09b3afefd1340f3e9eb637272714 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 12 Aug 2019 13:07:07 -0700 Subject: netlink: Fix nlmsg_parse as a wrapper for strict message parsing Eric reported a syzbot warning: BUG: KMSAN: uninit-value in nh_valid_get_del_req+0x6f1/0x8c0 net/ipv4/nexthop.c:1510 CPU: 0 PID: 11812 Comm: syz-executor444 Not tainted 5.3.0-rc3+ #17 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x191/0x1f0 lib/dump_stack.c:113 kmsan_report+0x162/0x2d0 mm/kmsan/kmsan_report.c:109 __msan_warning+0x75/0xe0 mm/kmsan/kmsan_instr.c:294 nh_valid_get_del_req+0x6f1/0x8c0 net/ipv4/nexthop.c:1510 rtm_del_nexthop+0x1b1/0x610 net/ipv4/nexthop.c:1543 rtnetlink_rcv_msg+0x115a/0x1580 net/core/rtnetlink.c:5223 netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5241 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0xf6c/0x1050 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x110f/0x1330 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg net/socket.c:657 [inline] ___sys_sendmsg+0x14ff/0x1590 net/socket.c:2311 __sys_sendmmsg+0x53a/0xae0 net/socket.c:2413 __do_sys_sendmmsg net/socket.c:2442 [inline] __se_sys_sendmmsg+0xbd/0xe0 net/socket.c:2439 __x64_sys_sendmmsg+0x56/0x70 net/socket.c:2439 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:297 entry_SYSCALL_64_after_hwframe+0x63/0xe7 The root cause is nlmsg_parse calling __nla_parse which means the header struct size is not checked. nlmsg_parse should be a wrapper around __nlmsg_parse with NL_VALIDATE_STRICT for the validate argument very much like nlmsg_parse_deprecated is for NL_VALIDATE_LIBERAL. Fixes: 3de6440354465 ("netlink: re-add parse/validate functions in strict mode") Reported-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David Ahern Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/netlink.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index e4650e5b64a1..b140c8f1be22 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -684,9 +684,8 @@ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, const struct nla_policy *policy, struct netlink_ext_ack *extack) { - return __nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), policy, - NL_VALIDATE_STRICT, extack); + return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy, + NL_VALIDATE_STRICT, extack); } /** -- cgit v1.2.1 From 6d5afe20397b478192ed8c38ec0ee10fa3aec649 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Tue, 13 Aug 2019 22:05:50 +0800 Subject: sctp: fix memleak in sctp_send_reset_streams If the stream outq is not empty, need to kfree nstr_list. Fixes: d570a59c5b5f ("sctp: only allow the out stream reset when the stream outq is empty") Reported-by: Hulk Robot Signed-off-by: zhengbin Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: Jakub Kicinski --- net/sctp/stream.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 25946604af85..e83cdaa2ab76 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -316,6 +316,7 @@ int sctp_send_reset_streams(struct sctp_association *asoc, nstr_list[i] = htons(str_list[i]); if (out && !sctp_stream_outq_is_empty(stream, str_nums, nstr_list)) { + kfree(nstr_list); retval = -EAGAIN; goto out; } -- cgit v1.2.1 From 2c60e6b5c9241b24b8b523fefd3e44fb85622cda Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 6 Aug 2019 13:41:51 +0200 Subject: gpiolib: never report open-drain/source lines as 'input' to user-space If the driver doesn't support open-drain/source config options, we emulate this behavior when setting the direction by calling gpiod_direction_input() if the default value is 0 (open-source) or 1 (open-drain), thus not actively driving the line in those cases. This however clears the FLAG_IS_OUT bit for the GPIO line descriptor and makes the LINEINFO ioctl() incorrectly report this line's mode as 'input' to user-space. This commit modifies the ioctl() to always set the GPIOLINE_FLAG_IS_OUT bit in the lineinfo structure's flags field. Since it's impossible to use the input mode and open-drain/source options at the same time, we can be sure the reported information will be correct. Fixes: 521a2ad6f862 ("gpio: add userspace ABI for GPIO line information") Cc: stable Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20190806114151.17652-1-brgl@bgdev.pl Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index f497003f119c..80a2a2cb673b 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1091,9 +1091,11 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) lineinfo.flags |= GPIOLINE_FLAG_ACTIVE_LOW; if (test_bit(FLAG_OPEN_DRAIN, &desc->flags)) - lineinfo.flags |= GPIOLINE_FLAG_OPEN_DRAIN; + lineinfo.flags |= (GPIOLINE_FLAG_OPEN_DRAIN | + GPIOLINE_FLAG_IS_OUT); if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) - lineinfo.flags |= GPIOLINE_FLAG_OPEN_SOURCE; + lineinfo.flags |= (GPIOLINE_FLAG_OPEN_SOURCE | + GPIOLINE_FLAG_IS_OUT); if (copy_to_user(ip, &lineinfo, sizeof(lineinfo))) return -EFAULT; -- cgit v1.2.1 From 68e03b85474a51ec1921b4d13204782594ef7223 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 31 Jul 2019 20:38:14 +0800 Subject: gpio: Fix build error of function redefinition when do randbuilding, I got this error: In file included from drivers/hwmon/pmbus/ucd9000.c:19:0: ./include/linux/gpio/driver.h:576:1: error: redefinition of gpiochip_add_pin_range gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name, ^~~~~~~~~~~~~~~~~~~~~~ In file included from drivers/hwmon/pmbus/ucd9000.c:18:0: ./include/linux/gpio.h:245:1: note: previous definition of gpiochip_add_pin_range was here gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name, ^~~~~~~~~~~~~~~~~~~~~~ Reported-by: Hulk Robot Fixes: 964cb341882f ("gpio: move pincontrol calls to ") Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20190731123814.46624-1-yuehaibing@huawei.com Signed-off-by: Linus Walleij --- include/linux/gpio.h | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 40915b461f18..f757a58191a6 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -241,30 +241,6 @@ static inline int irq_to_gpio(unsigned irq) return -EINVAL; } -static inline int -gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name, - unsigned int gpio_offset, unsigned int pin_offset, - unsigned int npins) -{ - WARN_ON(1); - return -EINVAL; -} - -static inline int -gpiochip_add_pingroup_range(struct gpio_chip *chip, - struct pinctrl_dev *pctldev, - unsigned int gpio_offset, const char *pin_group) -{ - WARN_ON(1); - return -EINVAL; -} - -static inline void -gpiochip_remove_pin_ranges(struct gpio_chip *chip) -{ - WARN_ON(1); -} - static inline int devm_gpio_request(struct device *dev, unsigned gpio, const char *label) { -- cgit v1.2.1 From dfe42be15fde16232340b8b2a57c359f51cc10d9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 13 Aug 2019 17:41:13 +0200 Subject: netfilter: nft_flow_offload: skip tcp rst and fin packets TCP rst and fin packets do not qualify to place a flow into the flowtable. Most likely there will be no more packets after connection closure. Without this patch, this flow entry expires and connection tracking picks up the entry in ESTABLISHED state using the fixup timeout, which makes this look inconsistent to the user for a connection that is actually already closed. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index aa5f571d4361..060a4ed46d5e 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -72,11 +72,11 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, { struct nft_flow_offload *priv = nft_expr_priv(expr); struct nf_flowtable *flowtable = &priv->flowtable->data; + struct tcphdr _tcph, *tcph = NULL; enum ip_conntrack_info ctinfo; struct nf_flow_route route; struct flow_offload *flow; enum ip_conntrack_dir dir; - bool is_tcp = false; struct nf_conn *ct; int ret; @@ -89,7 +89,10 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { case IPPROTO_TCP: - is_tcp = true; + tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, + sizeof(_tcph), &_tcph); + if (unlikely(!tcph || tcph->fin || tcph->rst)) + goto out; break; case IPPROTO_UDP: break; @@ -115,7 +118,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (!flow) goto err_flow_alloc; - if (is_tcp) { + if (tcph) { ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; } -- cgit v1.2.1 From b00df840fb4004b7087940ac5f68801562d0d2de Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 12 Aug 2019 23:30:06 +0100 Subject: rxrpc: Fix local endpoint replacement When a local endpoint (struct rxrpc_local) ceases to be in use by any AF_RXRPC sockets, it starts the process of being destroyed, but this doesn't cause it to be removed from the namespace endpoint list immediately as tearing it down isn't trivial and can't be done in softirq context, so it gets deferred. If a new socket comes along that wants to bind to the same endpoint, a new rxrpc_local object will be allocated and rxrpc_lookup_local() will use list_replace() to substitute the new one for the old. Then, when the dying object gets to rxrpc_local_destroyer(), it is removed unconditionally from whatever list it is on by calling list_del_init(). However, list_replace() doesn't reset the pointers in the replaced list_head and so the list_del_init() will likely corrupt the local endpoints list. Fix this by using list_replace_init() instead. Fixes: 730c5fd42c1e ("rxrpc: Fix local endpoint refcounting") Reported-by: syzbot+193e29e9387ea5837f1d@syzkaller.appspotmail.com Signed-off-by: David Howells --- net/rxrpc/local_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index c9db3e762d8d..c45765b7263e 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -283,7 +283,7 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, goto sock_error; if (cursor != &rxnet->local_endpoints) - list_replace(cursor, &local->link); + list_replace_init(cursor, &local->link); else list_add_tail(&local->link, cursor); age = "new"; -- cgit v1.2.1 From 06d9532fa6b34f12a6d75711162d47c17c1add72 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Aug 2019 22:26:36 +0100 Subject: rxrpc: Fix read-after-free in rxrpc_queue_local() rxrpc_queue_local() attempts to queue the local endpoint it is given and then, if successful, prints a trace line. The trace line includes the current usage count - but we're not allowed to look at the local endpoint at this point as we passed our ref on it to the workqueue. Fix this by reading the usage count before queuing the work item. Also fix the reading of local->debug_id for trace lines, which must be done with the same consideration as reading the usage count. Fixes: 09d2bf595db4 ("rxrpc: Add a tracepoint to track rxrpc_local refcounting") Reported-by: syzbot+78e71c5bab4f76a6a719@syzkaller.appspotmail.com Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 6 +++--- net/rxrpc/local_object.c | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index cc1d060cbf13..fa06b528c73c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -498,10 +498,10 @@ rxrpc_tx_points; #define E_(a, b) { a, b } TRACE_EVENT(rxrpc_local, - TP_PROTO(struct rxrpc_local *local, enum rxrpc_local_trace op, + TP_PROTO(unsigned int local_debug_id, enum rxrpc_local_trace op, int usage, const void *where), - TP_ARGS(local, op, usage, where), + TP_ARGS(local_debug_id, op, usage, where), TP_STRUCT__entry( __field(unsigned int, local ) @@ -511,7 +511,7 @@ TRACE_EVENT(rxrpc_local, ), TP_fast_assign( - __entry->local = local->debug_id; + __entry->local = local_debug_id; __entry->op = op; __entry->usage = usage; __entry->where = where; diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index c45765b7263e..72a6e12a9304 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -93,7 +93,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, local->debug_id = atomic_inc_return(&rxrpc_debug_id); memcpy(&local->srx, srx, sizeof(*srx)); local->srx.srx_service = 0; - trace_rxrpc_local(local, rxrpc_local_new, 1, NULL); + trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, NULL); } _leave(" = %p", local); @@ -321,7 +321,7 @@ struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local) int n; n = atomic_inc_return(&local->usage); - trace_rxrpc_local(local, rxrpc_local_got, n, here); + trace_rxrpc_local(local->debug_id, rxrpc_local_got, n, here); return local; } @@ -335,7 +335,8 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) if (local) { int n = atomic_fetch_add_unless(&local->usage, 1, 0); if (n > 0) - trace_rxrpc_local(local, rxrpc_local_got, n + 1, here); + trace_rxrpc_local(local->debug_id, rxrpc_local_got, + n + 1, here); else local = NULL; } @@ -343,16 +344,16 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) } /* - * Queue a local endpoint unless it has become unreferenced and pass the - * caller's reference to the work item. + * Queue a local endpoint and pass the caller's reference to the work item. */ void rxrpc_queue_local(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); + unsigned int debug_id = local->debug_id; + int n = atomic_read(&local->usage); if (rxrpc_queue_work(&local->processor)) - trace_rxrpc_local(local, rxrpc_local_queued, - atomic_read(&local->usage), here); + trace_rxrpc_local(debug_id, rxrpc_local_queued, n, here); else rxrpc_put_local(local); } @@ -367,7 +368,7 @@ void rxrpc_put_local(struct rxrpc_local *local) if (local) { n = atomic_dec_return(&local->usage); - trace_rxrpc_local(local, rxrpc_local_put, n, here); + trace_rxrpc_local(local->debug_id, rxrpc_local_put, n, here); if (n == 0) call_rcu(&local->rcu, rxrpc_local_rcu); @@ -456,7 +457,7 @@ static void rxrpc_local_processor(struct work_struct *work) container_of(work, struct rxrpc_local, processor); bool again; - trace_rxrpc_local(local, rxrpc_local_processing, + trace_rxrpc_local(local->debug_id, rxrpc_local_processing, atomic_read(&local->usage), NULL); do { -- cgit v1.2.1 From 6600c0808e2ea2939009e53983f066fe38bd308a Mon Sep 17 00:00:00 2001 From: Rocky Liao Date: Wed, 14 Aug 2019 15:42:39 +0800 Subject: Bluetooth: hci_qca: Skip 1 error print in device_want_to_sleep() Don't fall through to print error message when receive sleep indication in HCI_IBS_RX_ASLEEP state, this is allowed behavior. Signed-off-by: Rocky Liao Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_qca.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 0cfa5b831d39..9a970fd1975a 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -705,7 +705,7 @@ static void device_want_to_sleep(struct hci_uart *hu) unsigned long flags; struct qca_data *qca = hu->priv; - BT_DBG("hu %p want to sleep", hu); + BT_DBG("hu %p want to sleep in %d state", hu, qca->rx_ibs_state); spin_lock_irqsave(&qca->hci_ibs_lock, flags); @@ -720,7 +720,7 @@ static void device_want_to_sleep(struct hci_uart *hu) break; case HCI_IBS_RX_ASLEEP: - /* Fall through */ + break; default: /* Any other state is illegal */ -- cgit v1.2.1 From b14c876b994f208b6b95c222056e1deb0a45de0e Mon Sep 17 00:00:00 2001 From: Radim Krcmar Date: Tue, 13 Aug 2019 23:37:37 -0400 Subject: kvm: x86: skip populating logical dest map if apic is not sw enabled recalculate_apic_map does not santize ldr and it's possible that multiple bits are set. In that case, a previous valid entry can potentially be overwritten by an invalid one. This condition is hit when booting a 32 bit, >8 CPU, RHEL6 guest and then triggering a crash to boot a kdump kernel. This is the sequence of events: 1. Linux boots in bigsmp mode and enables PhysFlat, however, it still writes to the LDR which probably will never be used. 2. However, when booting into kdump, the stale LDR values remain as they are not cleared by the guest and there isn't a apic reset. 3. kdump boots with 1 cpu, and uses Logical Destination Mode but the logical map has been overwritten and points to an inactive vcpu. Signed-off-by: Radim Krcmar Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 685d17c11461..e904ff06a83d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -216,6 +216,9 @@ static void recalculate_apic_map(struct kvm *kvm) if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id]) new->phys_map[xapic_id] = apic; + if (!kvm_apic_sw_enabled(apic)) + continue; + ldr = kvm_lapic_get_reg(apic, APIC_LDR); if (apic_x2apic_mode(apic)) { @@ -258,6 +261,8 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) static_key_slow_dec_deferred(&apic_sw_disabled); else static_key_slow_inc(&apic_sw_disabled.key); + + recalculate_apic_map(apic->vcpu->kvm); } } -- cgit v1.2.1 From 74260dc278a725b692b1a201c6b780a02804d3e4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 9 Aug 2019 09:18:43 +0200 Subject: MAINTAINERS: change list for KVM/s390 KVM/s390 does not have a list of its own, and linux-s390 is in the loop anyway thanks to the generic arch/s390 match. So use the generic KVM list for s390 patches. Signed-off-by: Paolo Bonzini --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5e1f9ee8f86f..05c107b168c0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8869,7 +8869,7 @@ M: Christian Borntraeger M: Janosch Frank R: David Hildenbrand R: Cornelia Huck -L: linux-s390@vger.kernel.org +L: kvm@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git S: Supported -- cgit v1.2.1 From ed4e7b057e9e75cecd56f6c3434f88eaa69c1209 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 9 Aug 2019 09:30:02 +0200 Subject: MAINTAINERS: add KVM x86 reviewers This is probably overdue---KVM x86 has quite a few contributors that usually review each other's patches, which is really helpful to me. Formalize this by listing them as reviewers. I am including people with various expertise: - Joerg for SVM (with designated reviewers, it makes more sense to have him in the main KVM/x86 stanza) - Sean for MMU and VMX - Jim for VMX - Vitaly for Hyper-V and possibly SVM - Wanpeng for LAPIC and paravirtualization. Please ack if you are okay with this arrangement, otherwise speak up. In other news, Radim is going to leave Red Hat soon. However, he has not been very much involved in upstream KVM development for some time, and in the immediate future he is still going to help maintain kvm/queue while I am on vacation. Since not much is going to change, I will let him decide whether he wants to keep the maintainer role after he leaves. Acked-by: Joerg Roedel Acked-by: Vitaly Kuznetsov Acked-by: Wanpeng Li Cc: Sean Christopherson Cc: Jim Mattson Signed-off-by: Paolo Bonzini --- MAINTAINERS | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 05c107b168c0..166e765acce8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8819,14 +8819,6 @@ F: virt/kvm/* F: tools/kvm/ F: tools/testing/selftests/kvm/ -KERNEL VIRTUAL MACHINE FOR AMD-V (KVM/amd) -M: Joerg Roedel -L: kvm@vger.kernel.org -W: http://www.linux-kvm.org/ -S: Maintained -F: arch/x86/include/asm/svm.h -F: arch/x86/kvm/svm.c - KERNEL VIRTUAL MACHINE FOR ARM/ARM64 (KVM/arm, KVM/arm64) M: Marc Zyngier R: James Morse @@ -8884,6 +8876,11 @@ F: tools/testing/selftests/kvm/*/s390x/ KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86) M: Paolo Bonzini M: Radim Krčmář +R: Sean Christopherson +R: Vitaly Kuznetsov +R: Wanpeng Li +R: Jim Mattson +R: Joerg Roedel L: kvm@vger.kernel.org W: http://www.linux-kvm.org T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git @@ -8891,8 +8888,12 @@ S: Supported F: arch/x86/kvm/ F: arch/x86/kvm/*/ F: arch/x86/include/uapi/asm/kvm* +F: arch/x86/include/uapi/asm/vmx.h +F: arch/x86/include/uapi/asm/svm.h F: arch/x86/include/asm/kvm* F: arch/x86/include/asm/pvclock-abi.h +F: arch/x86/include/asm/svm.h +F: arch/x86/include/asm/vmx.h F: arch/x86/kernel/kvm.c F: arch/x86/kernel/kvmclock.c -- cgit v1.2.1 From c8e174b39887ea1992286ff8ffdbcf79f6057cf2 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 12 Aug 2019 10:33:00 +0800 Subject: KVM: x86: svm: remove redundant assignment of var new_entry new_entry is reassigned a new value next line. So it's redundant and remove it. Signed-off-by: Miaohe Lin Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d685491fce4d..e3d3b2128f2b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1714,7 +1714,6 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) if (!entry) return -EINVAL; - new_entry = READ_ONCE(*entry); new_entry = __sme_set((page_to_phys(svm->avic_backing_page) & AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) | AVIC_PHYSICAL_ID_ENTRY_VALID_MASK); -- cgit v1.2.1 From c7ec75ea4d5316518adc87224e3cff47192579e7 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Wed, 14 Aug 2019 10:30:14 -0500 Subject: clk: socfpga: stratix10: fix rate caclulationg for cnt_clks Checking bypass_reg is incorrect for calculating the cnt_clk rates. Instead we should be checking that there is a proper hardware register that holds the clock divider. Cc: stable@vger.kernel.org Signed-off-by: Dinh Nguyen Link: https://lkml.kernel.org/r/20190814153014.12962-1-dinguyen@kernel.org Signed-off-by: Stephen Boyd --- drivers/clk/socfpga/clk-periph-s10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/socfpga/clk-periph-s10.c b/drivers/clk/socfpga/clk-periph-s10.c index 5c50e723ecae..1a191eeeebba 100644 --- a/drivers/clk/socfpga/clk-periph-s10.c +++ b/drivers/clk/socfpga/clk-periph-s10.c @@ -38,7 +38,7 @@ static unsigned long clk_peri_cnt_clk_recalc_rate(struct clk_hw *hwclk, if (socfpgaclk->fixed_div) { div = socfpgaclk->fixed_div; } else { - if (!socfpgaclk->bypass_reg) + if (socfpgaclk->hw.reg) div = ((readl(socfpgaclk->hw.reg) & 0x7ff) + 1); } -- cgit v1.2.1 From 3ee1bb7aae97324ec9078da1f00cb2176919563f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 Aug 2019 04:57:27 -0700 Subject: batman-adv: fix uninit-value in batadv_netlink_get_ifindex() batadv_netlink_get_ifindex() needs to make sure user passed a correct u32 attribute. syzbot reported : BUG: KMSAN: uninit-value in batadv_netlink_dump_hardif+0x70d/0x880 net/batman-adv/netlink.c:968 CPU: 1 PID: 11705 Comm: syz-executor888 Not tainted 5.1.0+ #1 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x191/0x1f0 lib/dump_stack.c:113 kmsan_report+0x130/0x2a0 mm/kmsan/kmsan.c:622 __msan_warning+0x75/0xe0 mm/kmsan/kmsan_instr.c:310 batadv_netlink_dump_hardif+0x70d/0x880 net/batman-adv/netlink.c:968 genl_lock_dumpit+0xc6/0x130 net/netlink/genetlink.c:482 netlink_dump+0xa84/0x1ab0 net/netlink/af_netlink.c:2253 __netlink_dump_start+0xa3a/0xb30 net/netlink/af_netlink.c:2361 genl_family_rcv_msg net/netlink/genetlink.c:550 [inline] genl_rcv_msg+0xfc1/0x1a40 net/netlink/genetlink.c:627 netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2486 genl_rcv+0x63/0x80 net/netlink/genetlink.c:638 netlink_unicast_kernel net/netlink/af_netlink.c:1311 [inline] netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1337 netlink_sendmsg+0x127e/0x12f0 net/netlink/af_netlink.c:1926 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:661 [inline] ___sys_sendmsg+0xcc6/0x1200 net/socket.c:2260 __sys_sendmsg net/socket.c:2298 [inline] __do_sys_sendmsg net/socket.c:2307 [inline] __se_sys_sendmsg+0x305/0x460 net/socket.c:2305 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2305 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x440209 Fixes: b60620cf567b ("batman-adv: netlink: hardif query") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 6f08fd122a8d..7e052d6f759b 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -164,7 +164,7 @@ batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype) { struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype); - return attr ? nla_get_u32(attr) : 0; + return (attr && nla_len(attr) == sizeof(u32)) ? nla_get_u32(attr) : 0; } /** -- cgit v1.2.1 From 27df5c7068bf23cab282dc64b1c9894429b3b8a0 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 14 Aug 2019 12:41:09 +0200 Subject: selftests/bpf: fix "bind{4, 6} deny specific IP & port" on s390 "bind4 allow specific IP & port" and "bind6 deny specific IP & port" fail on s390 because of endianness issue: the 4 IP address bytes are loaded as a word and compared with a constant, but the value of this constant should be different on big- and little- endian machines, which is not the case right now. Use __bpf_constant_ntohl to generate proper value based on machine endianness. Fixes: 1d436885b23b ("selftests/bpf: Selftest for sys_bind post-hooks.") Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_sock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index fb679ac3d4b0..0e6652733462 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -13,6 +13,7 @@ #include #include "cgroup_helpers.h" +#include "bpf_endian.h" #include "bpf_rlimit.h" #include "bpf_util.h" @@ -232,7 +233,8 @@ static struct sock_test tests[] = { /* if (ip == expected && port == expected) */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock, src_ip6[3])), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x01000000, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, + __bpf_constant_ntohl(0x00000001), 4), BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock, src_port)), BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2), @@ -261,7 +263,8 @@ static struct sock_test tests[] = { /* if (ip == expected && port == expected) */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock, src_ip4)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x0100007F, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, + __bpf_constant_ntohl(0x7F000001), 4), BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock, src_port)), BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2), -- cgit v1.2.1 From 4c6f3196e6ea111c456c6086dc3f57d4706b0b2d Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 29 Jul 2019 14:33:34 +0900 Subject: drm/mediatek: use correct device to import PRIME buffers PRIME buffers should be imported using the DMA device. To this end, use a custom import function that mimics drm_gem_prime_import_dev(), but passes the correct device. Fixes: 119f5173628aa ("drm/mediatek: Add DRM Driver for Mediatek SoC MT8173.") Signed-off-by: Alexandre Courbot Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 95fdbd0fbcac..8b18a00a58c7 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -320,6 +320,18 @@ static const struct file_operations mtk_drm_fops = { .compat_ioctl = drm_compat_ioctl, }; +/* + * We need to override this because the device used to import the memory is + * not dev->dev, as drm_gem_prime_import() expects. + */ +struct drm_gem_object *mtk_drm_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct mtk_drm_private *private = dev->dev_private; + + return drm_gem_prime_import_dev(dev, dma_buf, private->dma_dev); +} + static struct drm_driver mtk_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME | DRIVER_ATOMIC, @@ -331,7 +343,7 @@ static struct drm_driver mtk_drm_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = drm_gem_prime_export, - .gem_prime_import = drm_gem_prime_import, + .gem_prime_import = mtk_drm_gem_prime_import, .gem_prime_get_sg_table = mtk_gem_prime_get_sg_table, .gem_prime_import_sg_table = mtk_gem_prime_import_sg_table, .gem_prime_mmap = mtk_drm_gem_mmap_buf, -- cgit v1.2.1 From 070955558e820b9a89c570b91b1f21762f62b288 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 29 Jul 2019 14:33:35 +0900 Subject: drm/mediatek: set DMA max segment size This driver requires imported PRIME buffers to appear contiguously in its IO address space. Make sure this is the case by setting the maximum DMA segment size to a more suitable value than the default 64KB. Signed-off-by: Alexandre Courbot Reviewed-by: Tomasz Figa Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 35 ++++++++++++++++++++++++++++++++-- drivers/gpu/drm/mediatek/mtk_drm_drv.h | 2 ++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 8b18a00a58c7..c021d4c8324f 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -213,6 +213,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) struct mtk_drm_private *private = drm->dev_private; struct platform_device *pdev; struct device_node *np; + struct device *dma_dev; int ret; if (!iommu_present(&platform_bus_type)) @@ -275,7 +276,29 @@ static int mtk_drm_kms_init(struct drm_device *drm) goto err_component_unbind; } - private->dma_dev = &pdev->dev; + dma_dev = &pdev->dev; + private->dma_dev = dma_dev; + + /* + * Configure the DMA segment size to make sure we get contiguous IOVA + * when importing PRIME buffers. + */ + if (!dma_dev->dma_parms) { + private->dma_parms_allocated = true; + dma_dev->dma_parms = + devm_kzalloc(drm->dev, sizeof(*dma_dev->dma_parms), + GFP_KERNEL); + } + if (!dma_dev->dma_parms) { + ret = -ENOMEM; + goto err_component_unbind; + } + + ret = dma_set_max_seg_size(dma_dev, (unsigned int)DMA_BIT_MASK(32)); + if (ret) { + dev_err(dma_dev, "Failed to set DMA segment size\n"); + goto err_unset_dma_parms; + } /* * We don't use the drm_irq_install() helpers provided by the DRM @@ -285,13 +308,16 @@ static int mtk_drm_kms_init(struct drm_device *drm) drm->irq_enabled = true; ret = drm_vblank_init(drm, MAX_CRTC); if (ret < 0) - goto err_component_unbind; + goto err_unset_dma_parms; drm_kms_helper_poll_init(drm); drm_mode_config_reset(drm); return 0; +err_unset_dma_parms: + if (private->dma_parms_allocated) + dma_dev->dma_parms = NULL; err_component_unbind: component_unbind_all(drm->dev, drm); err_config_cleanup: @@ -302,9 +328,14 @@ err_config_cleanup: static void mtk_drm_kms_deinit(struct drm_device *drm) { + struct mtk_drm_private *private = drm->dev_private; + drm_kms_helper_poll_fini(drm); drm_atomic_helper_shutdown(drm); + if (private->dma_parms_allocated) + private->dma_dev->dma_parms = NULL; + component_unbind_all(drm->dev, drm); drm_mode_config_cleanup(drm); } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.h b/drivers/gpu/drm/mediatek/mtk_drm_drv.h index 598ff3e70446..e03fea12ff59 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.h @@ -51,6 +51,8 @@ struct mtk_drm_private { } commit; struct drm_atomic_state *suspend_state; + + bool dma_parms_allocated; }; extern struct platform_driver mtk_ddp_driver; -- cgit v1.2.1 From 26fa656e9a0cbccddf7db132ea020d2169dbe46e Mon Sep 17 00:00:00 2001 From: Bill Kuzeja Date: Wed, 14 Aug 2019 10:24:41 -0400 Subject: scsi: qla2xxx: Fix gnl.l memory leak on adapter init failure If HBA initialization fails unexpectedly (exiting via probe_failed:), we may fail to free vha->gnl.l. So that we don't attempt to double free, set this pointer to NULL after a free and check for NULL at probe_failed: so we know whether or not to call dma_free_coherent. Signed-off-by: Bill Kuzeja Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_attr.c | 2 ++ drivers/scsi/qla2xxx/qla_os.c | 11 ++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 8d560c562e9c..6b7b390b2e52 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2956,6 +2956,8 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) dma_free_coherent(&ha->pdev->dev, vha->gnl.size, vha->gnl.l, vha->gnl.ldma); + vha->gnl.l = NULL; + vfree(vha->scan.l); if (vha->qpair && vha->qpair->vp_idx == vha->vp_idx) { diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 2e58cff9d200..98e60a34afd9 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3440,6 +3440,12 @@ skip_dpc: return 0; probe_failed: + if (base_vha->gnl.l) { + dma_free_coherent(&ha->pdev->dev, base_vha->gnl.size, + base_vha->gnl.l, base_vha->gnl.ldma); + base_vha->gnl.l = NULL; + } + if (base_vha->timer_active) qla2x00_stop_timer(base_vha); base_vha->flags.online = 0; @@ -3673,7 +3679,7 @@ qla2x00_remove_one(struct pci_dev *pdev) if (!atomic_read(&pdev->enable_cnt)) { dma_free_coherent(&ha->pdev->dev, base_vha->gnl.size, base_vha->gnl.l, base_vha->gnl.ldma); - + base_vha->gnl.l = NULL; scsi_host_put(base_vha->host); kfree(ha); pci_set_drvdata(pdev, NULL); @@ -3713,6 +3719,8 @@ qla2x00_remove_one(struct pci_dev *pdev) dma_free_coherent(&ha->pdev->dev, base_vha->gnl.size, base_vha->gnl.l, base_vha->gnl.ldma); + base_vha->gnl.l = NULL; + vfree(base_vha->scan.l); if (IS_QLAFX00(ha)) @@ -4816,6 +4824,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, "Alloc failed for scan database.\n"); dma_free_coherent(&ha->pdev->dev, vha->gnl.size, vha->gnl.l, vha->gnl.ldma); + vha->gnl.l = NULL; scsi_remove_host(vha->host); return NULL; } -- cgit v1.2.1 From a86a75865ff4d8c05f355d1750a5250aec89ab15 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Sun, 11 Aug 2019 11:25:10 -0700 Subject: scsi: target: tcmu: avoid use-after-free after command timeout In tcmu_handle_completion() function, the variable called read_len is always initialized with a value taken from se_cmd structure. If this function is called to complete an expired (timed out) out command, the session command pointed by se_cmd is likely to be already deallocated by the target core at that moment. As the result, this access triggers a use-after-free warning from KASAN. This patch fixes the code not to touch se_cmd when completing timed out TCMU commands. It also resets the pointer to se_cmd at the time when the TCMU_CMD_BIT_EXPIRED flag is set because it is going to become invalid after calling target_complete_cmd() later in the same function, tcmu_check_expired_cmd(). Signed-off-by: Dmitry Fomichev Acked-by: Mike Christie Reviewed-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/target/target_core_user.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 04eda111920e..661bb9358364 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1132,14 +1132,16 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * struct se_cmd *se_cmd = cmd->se_cmd; struct tcmu_dev *udev = cmd->tcmu_dev; bool read_len_valid = false; - uint32_t read_len = se_cmd->data_length; + uint32_t read_len; /* * cmd has been completed already from timeout, just reclaim * data area space and free cmd */ - if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) + if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) { + WARN_ON_ONCE(se_cmd); goto out; + } list_del_init(&cmd->queue_entry); @@ -1152,6 +1154,7 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * goto done; } + read_len = se_cmd->data_length; if (se_cmd->data_direction == DMA_FROM_DEVICE && (entry->hdr.uflags & TCMU_UFLAG_READ_LEN) && entry->rsp.read_len) { read_len_valid = true; @@ -1307,6 +1310,7 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data) */ scsi_status = SAM_STAT_CHECK_CONDITION; list_del_init(&cmd->queue_entry); + cmd->se_cmd = NULL; } else { list_del_init(&cmd->queue_entry); idr_remove(&udev->commands, id); @@ -2022,6 +2026,7 @@ static void tcmu_reset_ring(struct tcmu_dev *udev, u8 err_level) idr_remove(&udev->commands, i); if (!test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) { + WARN_ON(!cmd->se_cmd); list_del_init(&cmd->queue_entry); if (err_level == 1) { /* -- cgit v1.2.1 From 7c7cfdcf7f1777c7376fc9a239980de04b6b5ea1 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 14 Aug 2019 15:59:50 +0300 Subject: scsi: ufs: Fix NULL pointer dereference in ufshcd_config_vreg_hpm() Fix the following BUG: [ 187.065689] BUG: kernel NULL pointer dereference, address: 000000000000001c [ 187.065790] RIP: 0010:ufshcd_vreg_set_hpm+0x3c/0x110 [ufshcd_core] [ 187.065938] Call Trace: [ 187.065959] ufshcd_resume+0x72/0x290 [ufshcd_core] [ 187.065980] ufshcd_system_resume+0x54/0x140 [ufshcd_core] [ 187.065993] ? pci_pm_restore+0xb0/0xb0 [ 187.066005] ufshcd_pci_resume+0x15/0x20 [ufshcd_pci] [ 187.066017] pci_pm_thaw+0x4c/0x90 [ 187.066030] dpm_run_callback+0x5b/0x150 [ 187.066043] device_resume+0x11b/0x220 Voltage regulators are optional, so functions must check they exist before dereferencing. Note this issue is hidden if CONFIG_REGULATORS is not set, because the offending code is optimised away. Notes for stable: The issue first appears in commit 57d104c153d3 ("ufs: add UFS power management support") but is inadvertently fixed in commit 60f0187031c0 ("scsi: ufs: disable vccq if it's not needed by UFS device") which in turn was reverted by commit 730679817d83 ("Revert "scsi: ufs: disable vccq if it's not needed by UFS device""). So fix applies v3.18 to v4.5 and v5.1+ Fixes: 57d104c153d3 ("ufs: add UFS power management support") Fixes: 730679817d83 ("Revert "scsi: ufs: disable vccq if it's not needed by UFS device"") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index e274053109d0..029da74bb2f5 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -7062,6 +7062,9 @@ static inline int ufshcd_config_vreg_lpm(struct ufs_hba *hba, static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba, struct ufs_vreg *vreg) { + if (!vreg) + return 0; + return ufshcd_config_vreg_load(hba->dev, vreg, vreg->max_uA); } -- cgit v1.2.1 From c554336efa9bbc28d6ec14efbee3c7d63c61a34f Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Tue, 13 Aug 2019 04:18:52 -0500 Subject: cxgb4: fix a memory leak bug In blocked_fl_write(), 't' is not deallocated if bitmap_parse_user() fails, leading to a memory leak bug. To fix this issue, free t before returning the error. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 02959035ed3f..d692251ee252 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -3236,8 +3236,10 @@ static ssize_t blocked_fl_write(struct file *filp, const char __user *ubuf, return -ENOMEM; err = bitmap_parse_user(ubuf, count, t, adap->sge.egr_sz); - if (err) + if (err) { + kvfree(t); return err; + } bitmap_copy(adap->sge.blocked_fl, t, adap->sge.egr_sz); kvfree(t); -- cgit v1.2.1 From 92cd0f0be3d7adb63611c28693ec0399beded837 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 Aug 2019 18:18:55 +0200 Subject: selftests: kvm: do not try running the VM in vmx_set_nested_state_test This test is only covering various edge cases of the KVM_SET_NESTED_STATE ioctl. Running the VM does not really add anything. Signed-off-by: Paolo Bonzini --- .../selftests/kvm/x86_64/vmx_set_nested_state_test.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c index ed7218d166da..a99fc66dafeb 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c @@ -27,22 +27,13 @@ void test_nested_state(struct kvm_vm *vm, struct kvm_nested_state *state) { - volatile struct kvm_run *run; - vcpu_nested_state_set(vm, VCPU_ID, state, false); - run = vcpu_state(vm, VCPU_ID); - vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, - "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s),\n", - run->exit_reason, - exit_reason_str(run->exit_reason)); } void test_nested_state_expect_errno(struct kvm_vm *vm, struct kvm_nested_state *state, int expected_errno) { - volatile struct kvm_run *run; int rv; rv = vcpu_nested_state_set(vm, VCPU_ID, state, true); @@ -50,12 +41,6 @@ void test_nested_state_expect_errno(struct kvm_vm *vm, "Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)", strerror(expected_errno), expected_errno, rv, strerror(errno), errno); - run = vcpu_state(vm, VCPU_ID); - vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, - "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s),\n", - run->exit_reason, - exit_reason_str(run->exit_reason)); } void test_nested_state_expect_einval(struct kvm_vm *vm, -- cgit v1.2.1 From 65efa61dc0d536d5f0602c33ee805a57cc07e9dc Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 Aug 2019 12:02:41 -0400 Subject: selftests: kvm: provide common function to enable eVMCS There are two tests already enabling eVMCS and a third is coming. Add a function that enables the capability and tests the result. Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/evmcs.h | 2 ++ tools/testing/selftests/kvm/lib/x86_64/vmx.c | 20 ++++++++++++++++++++ tools/testing/selftests/kvm/x86_64/evmcs_test.c | 15 ++------------- tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c | 12 ++++-------- 4 files changed, 28 insertions(+), 21 deletions(-) diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/evmcs.h index 4059014d93ea..4912d23844bc 100644 --- a/tools/testing/selftests/kvm/include/evmcs.h +++ b/tools/testing/selftests/kvm/include/evmcs.h @@ -220,6 +220,8 @@ struct hv_enlightened_vmcs { struct hv_enlightened_vmcs *current_evmcs; struct hv_vp_assist_page *current_vp_assist; +int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id); + static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) { u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 204f847bd065..9cef0455b819 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -12,6 +12,26 @@ bool enable_evmcs; +int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id) +{ + uint16_t evmcs_ver; + + struct kvm_enable_cap enable_evmcs_cap = { + .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS, + .args[0] = (unsigned long)&evmcs_ver + }; + + vcpu_ioctl(vm, vcpu_id, KVM_ENABLE_CAP, &enable_evmcs_cap); + + /* KVM should return supported EVMCS version range */ + TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) && + (evmcs_ver & 0xff) > 0, + "Incorrect EVMCS version range: %x:%x\n", + evmcs_ver & 0xff, evmcs_ver >> 8); + + return evmcs_ver; +} + /* Allocate memory regions for nested VMX tests. * * Input Args: diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index f95c08343b48..92915e6408e7 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -79,11 +79,6 @@ int main(int argc, char *argv[]) struct kvm_x86_state *state; struct ucall uc; int stage; - uint16_t evmcs_ver; - struct kvm_enable_cap enable_evmcs_cap = { - .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS, - .args[0] = (unsigned long)&evmcs_ver - }; /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); @@ -96,13 +91,7 @@ int main(int argc, char *argv[]) exit(KSFT_SKIP); } - vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); - - /* KVM should return supported EVMCS version range */ - TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) && - (evmcs_ver & 0xff) > 0, - "Incorrect EVMCS version range: %x:%x\n", - evmcs_ver & 0xff, evmcs_ver >> 8); + vcpu_enable_evmcs(vm, VCPU_ID); run = vcpu_state(vm, VCPU_ID); @@ -146,7 +135,7 @@ int main(int argc, char *argv[]) kvm_vm_restart(vm, O_RDWR); vm_vcpu_add(vm, VCPU_ID); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); + vcpu_enable_evmcs(vm, VCPU_ID); vcpu_load_state(vm, VCPU_ID, state); run = vcpu_state(vm, VCPU_ID); free(state); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index f72b3043db0e..ee59831fbc98 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -18,6 +18,7 @@ #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "vmx.h" #define VCPU_ID 0 @@ -106,12 +107,7 @@ int main(int argc, char *argv[]) { struct kvm_vm *vm; int rv; - uint16_t evmcs_ver; struct kvm_cpuid2 *hv_cpuid_entries; - struct kvm_enable_cap enable_evmcs_cap = { - .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS, - .args[0] = (unsigned long)&evmcs_ver - }; /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); @@ -136,14 +132,14 @@ int main(int argc, char *argv[]) free(hv_cpuid_entries); - rv = _vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); - - if (rv) { + if (!kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { fprintf(stderr, "Enlightened VMCS is unsupported, skip related test\n"); goto vm_free; } + vcpu_enable_evmcs(vm, VCPU_ID); + hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm); if (!hv_cpuid_entries) return 1; -- cgit v1.2.1 From c930e19790bbbff31c018009907c813fa0925f63 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 Aug 2019 12:07:34 -0400 Subject: selftests: kvm: fix vmx_set_nested_state_test vmx_set_nested_state_test is trying to use the KVM_STATE_NESTED_EVMCS without enabling enlightened VMCS first. Correct the outcome of the test, and actually test that it succeeds after the capability is enabled. Signed-off-by: Paolo Bonzini --- .../selftests/kvm/x86_64/vmx_set_nested_state_test.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c index a99fc66dafeb..853e370e8a39 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c @@ -25,6 +25,8 @@ #define VMCS12_REVISION 0x11e57ed0 #define VCPU_ID 5 +bool have_evmcs; + void test_nested_state(struct kvm_vm *vm, struct kvm_nested_state *state) { vcpu_nested_state_set(vm, VCPU_ID, state, false); @@ -75,8 +77,9 @@ void set_default_vmx_state(struct kvm_nested_state *state, int size) { memset(state, 0, size); state->flags = KVM_STATE_NESTED_GUEST_MODE | - KVM_STATE_NESTED_RUN_PENDING | - KVM_STATE_NESTED_EVMCS; + KVM_STATE_NESTED_RUN_PENDING; + if (have_evmcs) + state->flags |= KVM_STATE_NESTED_EVMCS; state->format = 0; state->size = size; state->hdr.vmx.vmxon_pa = 0x1000; @@ -126,13 +129,19 @@ void test_vmx_nested_state(struct kvm_vm *vm) /* * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without * setting the nested state but flags other than eVMCS must be clear. + * The eVMCS flag can be set if the enlightened VMCS capability has + * been enabled. */ set_default_vmx_state(state, state_sz); state->hdr.vmx.vmxon_pa = -1ull; state->hdr.vmx.vmcs12_pa = -1ull; test_nested_state_expect_einval(vm, state); - state->flags = KVM_STATE_NESTED_EVMCS; + state->flags &= KVM_STATE_NESTED_EVMCS; + if (have_evmcs) { + test_nested_state_expect_einval(vm, state); + vcpu_enable_evmcs(vm, VCPU_ID); + } test_nested_state(vm, state); /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */ @@ -217,6 +226,8 @@ int main(int argc, char *argv[]) struct kvm_nested_state state; struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS); + if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) { printf("KVM_CAP_NESTED_STATE not available, skipping test\n"); exit(KSFT_SKIP); -- cgit v1.2.1 From ad54567ad5d8e938ee6cf02e4f3867f18835ae6e Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 1 Aug 2019 18:01:17 -0400 Subject: PCI: Reset both NVIDIA GPU and HDA in ThinkPad P50 workaround quirk_reset_lenovo_thinkpad_50_nvgpu() resets NVIDIA GPUs to work around an apparent BIOS defect. It previously used pci_reset_function(), and the available method was a bus reset, which was fine because there was only one function on the bus. After b516ea586d71 ("PCI: Enable NVIDIA HDA controllers"), there are now two functions (the HDA controller and the GPU itself) on the bus, so the reset fails. Use pci_reset_bus() explicitly instead of pci_reset_function() since it's OK to reset both devices. [bhelgaas: commit log, add e0547c81bfcf] Fixes: b516ea586d71 ("PCI: Enable NVIDIA HDA controllers") Fixes: e0547c81bfcf ("PCI: Reset Lenovo ThinkPad P50 nvgpu at boot if necessary") Link: https://lore.kernel.org/r/20190801220117.14952-1-lyude@redhat.com Signed-off-by: Lyude Paul Signed-off-by: Bjorn Helgaas Acked-by: Ben Skeggs Cc: Lukas Wunner Cc: Daniel Drake Cc: Aaron Plattner Cc: Peter Wu Cc: Ilia Mirkin Cc: Karol Herbst Cc: Maik Freudenberg --- drivers/pci/quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 208aacf39329..44c4ae1abd00 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5256,7 +5256,7 @@ static void quirk_reset_lenovo_thinkpad_p50_nvgpu(struct pci_dev *pdev) */ if (ioread32(map + 0x2240c) & 0x2) { pci_info(pdev, FW_BUG "GPU left initialized by EFI, resetting\n"); - ret = pci_reset_function(pdev); + ret = pci_reset_bus(pdev); if (ret < 0) pci_err(pdev, "Failed to reset GPU: %d\n", ret); } -- cgit v1.2.1 From 7bafda88de20b2990461d253c5475007436e355c Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 15 Aug 2019 12:12:38 -0500 Subject: Documentation PCI: Fix pciebus-howto.rst filename typo 2e6422444894 ("Documentation: PCI: convert PCIEBUS-HOWTO.txt to reST") incorrectly renamed PCIEBUS-HOWTO.txt to picebus-howto.rst. Rename it to pciebus-howto.rst. Fixes: 2e6422444894 ("Documentation: PCI: convert PCIEBUS-HOWTO.txt to reST") Signed-off-by: Bjorn Helgaas --- Documentation/PCI/index.rst | 2 +- Documentation/PCI/pciebus-howto.rst | 220 ++++++++++++++++++++++++++++++++++++ Documentation/PCI/picebus-howto.rst | 220 ------------------------------------ 3 files changed, 221 insertions(+), 221 deletions(-) create mode 100644 Documentation/PCI/pciebus-howto.rst delete mode 100644 Documentation/PCI/picebus-howto.rst diff --git a/Documentation/PCI/index.rst b/Documentation/PCI/index.rst index f4c6121868c3..6768305e4c26 100644 --- a/Documentation/PCI/index.rst +++ b/Documentation/PCI/index.rst @@ -9,7 +9,7 @@ Linux PCI Bus Subsystem :numbered: pci - picebus-howto + pciebus-howto pci-iov-howto msi-howto acpi-info diff --git a/Documentation/PCI/pciebus-howto.rst b/Documentation/PCI/pciebus-howto.rst new file mode 100644 index 000000000000..f882ff62c51f --- /dev/null +++ b/Documentation/PCI/pciebus-howto.rst @@ -0,0 +1,220 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +=========================================== +The PCI Express Port Bus Driver Guide HOWTO +=========================================== + +:Author: Tom L Nguyen tom.l.nguyen@intel.com 11/03/2004 +:Copyright: |copy| 2004 Intel Corporation + +About this guide +================ + +This guide describes the basics of the PCI Express Port Bus driver +and provides information on how to enable the service drivers to +register/unregister with the PCI Express Port Bus Driver. + + +What is the PCI Express Port Bus Driver +======================================= + +A PCI Express Port is a logical PCI-PCI Bridge structure. There +are two types of PCI Express Port: the Root Port and the Switch +Port. The Root Port originates a PCI Express link from a PCI Express +Root Complex and the Switch Port connects PCI Express links to +internal logical PCI buses. The Switch Port, which has its secondary +bus representing the switch's internal routing logic, is called the +switch's Upstream Port. The switch's Downstream Port is bridging from +switch's internal routing bus to a bus representing the downstream +PCI Express link from the PCI Express Switch. + +A PCI Express Port can provide up to four distinct functions, +referred to in this document as services, depending on its port type. +PCI Express Port's services include native hotplug support (HP), +power management event support (PME), advanced error reporting +support (AER), and virtual channel support (VC). These services may +be handled by a single complex driver or be individually distributed +and handled by corresponding service drivers. + +Why use the PCI Express Port Bus Driver? +======================================== + +In existing Linux kernels, the Linux Device Driver Model allows a +physical device to be handled by only a single driver. The PCI +Express Port is a PCI-PCI Bridge device with multiple distinct +services. To maintain a clean and simple solution each service +may have its own software service driver. In this case several +service drivers will compete for a single PCI-PCI Bridge device. +For example, if the PCI Express Root Port native hotplug service +driver is loaded first, it claims a PCI-PCI Bridge Root Port. The +kernel therefore does not load other service drivers for that Root +Port. In other words, it is impossible to have multiple service +drivers load and run on a PCI-PCI Bridge device simultaneously +using the current driver model. + +To enable multiple service drivers running simultaneously requires +having a PCI Express Port Bus driver, which manages all populated +PCI Express Ports and distributes all provided service requests +to the corresponding service drivers as required. Some key +advantages of using the PCI Express Port Bus driver are listed below: + + - Allow multiple service drivers to run simultaneously on + a PCI-PCI Bridge Port device. + + - Allow service drivers implemented in an independent + staged approach. + + - Allow one service driver to run on multiple PCI-PCI Bridge + Port devices. + + - Manage and distribute resources of a PCI-PCI Bridge Port + device to requested service drivers. + +Configuring the PCI Express Port Bus Driver vs. Service Drivers +=============================================================== + +Including the PCI Express Port Bus Driver Support into the Kernel +----------------------------------------------------------------- + +Including the PCI Express Port Bus driver depends on whether the PCI +Express support is included in the kernel config. The kernel will +automatically include the PCI Express Port Bus driver as a kernel +driver when the PCI Express support is enabled in the kernel. + +Enabling Service Driver Support +------------------------------- + +PCI device drivers are implemented based on Linux Device Driver Model. +All service drivers are PCI device drivers. As discussed above, it is +impossible to load any service driver once the kernel has loaded the +PCI Express Port Bus Driver. To meet the PCI Express Port Bus Driver +Model requires some minimal changes on existing service drivers that +imposes no impact on the functionality of existing service drivers. + +A service driver is required to use the two APIs shown below to +register its service with the PCI Express Port Bus driver (see +section 5.2.1 & 5.2.2). It is important that a service driver +initializes the pcie_port_service_driver data structure, included in +header file /include/linux/pcieport_if.h, before calling these APIs. +Failure to do so will result an identity mismatch, which prevents +the PCI Express Port Bus driver from loading a service driver. + +pcie_port_service_register +~~~~~~~~~~~~~~~~~~~~~~~~~~ +:: + + int pcie_port_service_register(struct pcie_port_service_driver *new) + +This API replaces the Linux Driver Model's pci_register_driver API. A +service driver should always calls pcie_port_service_register at +module init. Note that after service driver being loaded, calls +such as pci_enable_device(dev) and pci_set_master(dev) are no longer +necessary since these calls are executed by the PCI Port Bus driver. + +pcie_port_service_unregister +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +:: + + void pcie_port_service_unregister(struct pcie_port_service_driver *new) + +pcie_port_service_unregister replaces the Linux Driver Model's +pci_unregister_driver. It's always called by service driver when a +module exits. + +Sample Code +~~~~~~~~~~~ + +Below is sample service driver code to initialize the port service +driver data structure. +:: + + static struct pcie_port_service_id service_id[] = { { + .vendor = PCI_ANY_ID, + .device = PCI_ANY_ID, + .port_type = PCIE_RC_PORT, + .service_type = PCIE_PORT_SERVICE_AER, + }, { /* end: all zeroes */ } + }; + + static struct pcie_port_service_driver root_aerdrv = { + .name = (char *)device_name, + .id_table = &service_id[0], + + .probe = aerdrv_load, + .remove = aerdrv_unload, + + .suspend = aerdrv_suspend, + .resume = aerdrv_resume, + }; + +Below is a sample code for registering/unregistering a service +driver. +:: + + static int __init aerdrv_service_init(void) + { + int retval = 0; + + retval = pcie_port_service_register(&root_aerdrv); + if (!retval) { + /* + * FIX ME + */ + } + return retval; + } + + static void __exit aerdrv_service_exit(void) + { + pcie_port_service_unregister(&root_aerdrv); + } + + module_init(aerdrv_service_init); + module_exit(aerdrv_service_exit); + +Possible Resource Conflicts +=========================== + +Since all service drivers of a PCI-PCI Bridge Port device are +allowed to run simultaneously, below lists a few of possible resource +conflicts with proposed solutions. + +MSI and MSI-X Vector Resource +----------------------------- + +Once MSI or MSI-X interrupts are enabled on a device, it stays in this +mode until they are disabled again. Since service drivers of the same +PCI-PCI Bridge port share the same physical device, if an individual +service driver enables or disables MSI/MSI-X mode it may result +unpredictable behavior. + +To avoid this situation all service drivers are not permitted to +switch interrupt mode on its device. The PCI Express Port Bus driver +is responsible for determining the interrupt mode and this should be +transparent to service drivers. Service drivers need to know only +the vector IRQ assigned to the field irq of struct pcie_device, which +is passed in when the PCI Express Port Bus driver probes each service +driver. Service drivers should use (struct pcie_device*)dev->irq to +call request_irq/free_irq. In addition, the interrupt mode is stored +in the field interrupt_mode of struct pcie_device. + +PCI Memory/IO Mapped Regions +---------------------------- + +Service drivers for PCI Express Power Management (PME), Advanced +Error Reporting (AER), Hot-Plug (HP) and Virtual Channel (VC) access +PCI configuration space on the PCI Express port. In all cases the +registers accessed are independent of each other. This patch assumes +that all service drivers will be well behaved and not overwrite +other service driver's configuration settings. + +PCI Config Registers +-------------------- + +Each service driver runs its PCI config operations on its own +capability structure except the PCI Express capability structure, in +which Root Control register and Device Control register are shared +between PME and AER. This patch assumes that all service drivers +will be well behaved and not overwrite other service driver's +configuration settings. diff --git a/Documentation/PCI/picebus-howto.rst b/Documentation/PCI/picebus-howto.rst deleted file mode 100644 index f882ff62c51f..000000000000 --- a/Documentation/PCI/picebus-howto.rst +++ /dev/null @@ -1,220 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 -.. include:: - -=========================================== -The PCI Express Port Bus Driver Guide HOWTO -=========================================== - -:Author: Tom L Nguyen tom.l.nguyen@intel.com 11/03/2004 -:Copyright: |copy| 2004 Intel Corporation - -About this guide -================ - -This guide describes the basics of the PCI Express Port Bus driver -and provides information on how to enable the service drivers to -register/unregister with the PCI Express Port Bus Driver. - - -What is the PCI Express Port Bus Driver -======================================= - -A PCI Express Port is a logical PCI-PCI Bridge structure. There -are two types of PCI Express Port: the Root Port and the Switch -Port. The Root Port originates a PCI Express link from a PCI Express -Root Complex and the Switch Port connects PCI Express links to -internal logical PCI buses. The Switch Port, which has its secondary -bus representing the switch's internal routing logic, is called the -switch's Upstream Port. The switch's Downstream Port is bridging from -switch's internal routing bus to a bus representing the downstream -PCI Express link from the PCI Express Switch. - -A PCI Express Port can provide up to four distinct functions, -referred to in this document as services, depending on its port type. -PCI Express Port's services include native hotplug support (HP), -power management event support (PME), advanced error reporting -support (AER), and virtual channel support (VC). These services may -be handled by a single complex driver or be individually distributed -and handled by corresponding service drivers. - -Why use the PCI Express Port Bus Driver? -======================================== - -In existing Linux kernels, the Linux Device Driver Model allows a -physical device to be handled by only a single driver. The PCI -Express Port is a PCI-PCI Bridge device with multiple distinct -services. To maintain a clean and simple solution each service -may have its own software service driver. In this case several -service drivers will compete for a single PCI-PCI Bridge device. -For example, if the PCI Express Root Port native hotplug service -driver is loaded first, it claims a PCI-PCI Bridge Root Port. The -kernel therefore does not load other service drivers for that Root -Port. In other words, it is impossible to have multiple service -drivers load and run on a PCI-PCI Bridge device simultaneously -using the current driver model. - -To enable multiple service drivers running simultaneously requires -having a PCI Express Port Bus driver, which manages all populated -PCI Express Ports and distributes all provided service requests -to the corresponding service drivers as required. Some key -advantages of using the PCI Express Port Bus driver are listed below: - - - Allow multiple service drivers to run simultaneously on - a PCI-PCI Bridge Port device. - - - Allow service drivers implemented in an independent - staged approach. - - - Allow one service driver to run on multiple PCI-PCI Bridge - Port devices. - - - Manage and distribute resources of a PCI-PCI Bridge Port - device to requested service drivers. - -Configuring the PCI Express Port Bus Driver vs. Service Drivers -=============================================================== - -Including the PCI Express Port Bus Driver Support into the Kernel ------------------------------------------------------------------ - -Including the PCI Express Port Bus driver depends on whether the PCI -Express support is included in the kernel config. The kernel will -automatically include the PCI Express Port Bus driver as a kernel -driver when the PCI Express support is enabled in the kernel. - -Enabling Service Driver Support -------------------------------- - -PCI device drivers are implemented based on Linux Device Driver Model. -All service drivers are PCI device drivers. As discussed above, it is -impossible to load any service driver once the kernel has loaded the -PCI Express Port Bus Driver. To meet the PCI Express Port Bus Driver -Model requires some minimal changes on existing service drivers that -imposes no impact on the functionality of existing service drivers. - -A service driver is required to use the two APIs shown below to -register its service with the PCI Express Port Bus driver (see -section 5.2.1 & 5.2.2). It is important that a service driver -initializes the pcie_port_service_driver data structure, included in -header file /include/linux/pcieport_if.h, before calling these APIs. -Failure to do so will result an identity mismatch, which prevents -the PCI Express Port Bus driver from loading a service driver. - -pcie_port_service_register -~~~~~~~~~~~~~~~~~~~~~~~~~~ -:: - - int pcie_port_service_register(struct pcie_port_service_driver *new) - -This API replaces the Linux Driver Model's pci_register_driver API. A -service driver should always calls pcie_port_service_register at -module init. Note that after service driver being loaded, calls -such as pci_enable_device(dev) and pci_set_master(dev) are no longer -necessary since these calls are executed by the PCI Port Bus driver. - -pcie_port_service_unregister -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -:: - - void pcie_port_service_unregister(struct pcie_port_service_driver *new) - -pcie_port_service_unregister replaces the Linux Driver Model's -pci_unregister_driver. It's always called by service driver when a -module exits. - -Sample Code -~~~~~~~~~~~ - -Below is sample service driver code to initialize the port service -driver data structure. -:: - - static struct pcie_port_service_id service_id[] = { { - .vendor = PCI_ANY_ID, - .device = PCI_ANY_ID, - .port_type = PCIE_RC_PORT, - .service_type = PCIE_PORT_SERVICE_AER, - }, { /* end: all zeroes */ } - }; - - static struct pcie_port_service_driver root_aerdrv = { - .name = (char *)device_name, - .id_table = &service_id[0], - - .probe = aerdrv_load, - .remove = aerdrv_unload, - - .suspend = aerdrv_suspend, - .resume = aerdrv_resume, - }; - -Below is a sample code for registering/unregistering a service -driver. -:: - - static int __init aerdrv_service_init(void) - { - int retval = 0; - - retval = pcie_port_service_register(&root_aerdrv); - if (!retval) { - /* - * FIX ME - */ - } - return retval; - } - - static void __exit aerdrv_service_exit(void) - { - pcie_port_service_unregister(&root_aerdrv); - } - - module_init(aerdrv_service_init); - module_exit(aerdrv_service_exit); - -Possible Resource Conflicts -=========================== - -Since all service drivers of a PCI-PCI Bridge Port device are -allowed to run simultaneously, below lists a few of possible resource -conflicts with proposed solutions. - -MSI and MSI-X Vector Resource ------------------------------ - -Once MSI or MSI-X interrupts are enabled on a device, it stays in this -mode until they are disabled again. Since service drivers of the same -PCI-PCI Bridge port share the same physical device, if an individual -service driver enables or disables MSI/MSI-X mode it may result -unpredictable behavior. - -To avoid this situation all service drivers are not permitted to -switch interrupt mode on its device. The PCI Express Port Bus driver -is responsible for determining the interrupt mode and this should be -transparent to service drivers. Service drivers need to know only -the vector IRQ assigned to the field irq of struct pcie_device, which -is passed in when the PCI Express Port Bus driver probes each service -driver. Service drivers should use (struct pcie_device*)dev->irq to -call request_irq/free_irq. In addition, the interrupt mode is stored -in the field interrupt_mode of struct pcie_device. - -PCI Memory/IO Mapped Regions ----------------------------- - -Service drivers for PCI Express Power Management (PME), Advanced -Error Reporting (AER), Hot-Plug (HP) and Virtual Channel (VC) access -PCI configuration space on the PCI Express port. In all cases the -registers accessed are independent of each other. This patch assumes -that all service drivers will be well behaved and not overwrite -other service driver's configuration settings. - -PCI Config Registers --------------------- - -Each service driver runs its PCI config operations on its own -capability structure except the PCI Express capability structure, in -which Root Control register and Device Control register are shared -between PME and AER. This patch assumes that all service drivers -will be well behaved and not overwrite other service driver's -configuration settings. -- cgit v1.2.1 From 2aafdf5a5786ebbd8ccfe132ed6267c6962c5c3c Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Thu, 15 Aug 2019 09:58:26 +0200 Subject: selftests: net: tcp_fastopen_backup_key.sh: fix shellcheck issue When running tcp_fastopen_backup_key.sh the following issue was seen in a busybox environment. ./tcp_fastopen_backup_key.sh: line 33: [: -ne: unary operator expected Shellcheck showed the following issue. $ shellcheck tools/testing/selftests/net/tcp_fastopen_backup_key.sh In tools/testing/selftests/net/tcp_fastopen_backup_key.sh line 33: if [ $val -ne 0 ]; then ^-- SC2086: Double quote to prevent globbing and word splitting. Rework to do a string comparison instead. Signed-off-by: Anders Roxell Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_fastopen_backup_key.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.sh b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh index 41476399e184..f6e65674b83c 100755 --- a/tools/testing/selftests/net/tcp_fastopen_backup_key.sh +++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh @@ -30,7 +30,7 @@ do_test() { ip netns exec "${NETNS}" ./tcp_fastopen_backup_key "$1" val=$(ip netns exec "${NETNS}" nstat -az | \ grep TcpExtTCPFastOpenPassiveFail | awk '{print $2}') - if [ $val -ne 0 ]; then + if [ "$val" != 0 ]; then echo "FAIL: TcpExtTCPFastOpenPassiveFail non-zero" return 1 fi -- cgit v1.2.1 From e0d57d9c7e7a223f3c2ff8b7b63ec1bf63f11ed4 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Fri, 2 Aug 2019 14:42:14 +0300 Subject: net/mlx5e: Fix a race with XSKICOSQ in XSK wakeup flow Add a missing spinlock around XSKICOSQ usage at the activation stage, because there is a race between a configuration change and the application calling sendto(). Fixes: db05815b36cb ("net/mlx5e: Add XSK zero-copy support") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index aaffa6f68dc0..7f78c004d12f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -143,7 +143,10 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c) { set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); /* TX queue is created active. */ + + spin_lock(&c->xskicosq_lock); mlx5e_trigger_irq(&c->xskicosq); + spin_unlock(&c->xskicosq_lock); } void mlx5e_deactivate_xsk(struct mlx5e_channel *c) -- cgit v1.2.1 From f43d48d10a42787c1de1d3facd7db210c91db1da Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 1 Aug 2019 14:27:30 +0300 Subject: net/mlx5e: Fix compatibility issue with ethtool flash device Cited patch deleted ethtool flash device support, as ethtool core can fallback into devlink flash callback. However, this is supported only if there is a devlink port registered over the corresponding netdevice. As mlx5e do not have devlink port support over native netdevice, it broke the ability to flash device via ethtool. This patch re-add the ethtool callback to avoid user functionality breakage when trying to flash device via ethtool. Fixes: 9c8bca2637b8 ("mlx5: Move firmware flash implementation to devlink") Signed-off-by: Eran Ben Elisha Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 ++ .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 35 ++++++++++++++++++++++ .../ethernet/mellanox/mlx5/core/ipoib/ethtool.c | 9 ++++++ 3 files changed, 46 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f6b64a03cd06..65bec19a438f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1105,6 +1105,8 @@ u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info); +int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash); void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, struct ethtool_pauseparam *pauseparam); int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index e89dba790a2d..20e628c907e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1690,6 +1690,40 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, return 0; } +int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *dev = priv->netdev; + const struct firmware *fw; + int err; + + if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) + return -EOPNOTSUPP; + + err = request_firmware_direct(&fw, flash->data, &dev->dev); + if (err) + return err; + + dev_hold(dev); + rtnl_unlock(); + + err = mlx5_firmware_flash(mdev, fw, NULL); + release_firmware(fw); + + rtnl_lock(); + dev_put(dev); + return err; +} + +static int mlx5e_flash_device(struct net_device *dev, + struct ethtool_flash *flash) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_flash_device(priv, flash); +} + static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, bool is_rx_cq) { @@ -1972,6 +2006,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_wol = mlx5e_set_wol, .get_module_info = mlx5e_get_module_info, .get_module_eeprom = mlx5e_get_module_eeprom, + .flash_device = mlx5e_flash_device, .get_priv_flags = mlx5e_get_priv_flags, .set_priv_flags = mlx5e_set_priv_flags, .self_test = mlx5e_self_test, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index ebd81f6b556e..90cb50fe17fd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -122,6 +122,14 @@ static int mlx5i_get_ts_info(struct net_device *netdev, return mlx5e_ethtool_get_ts_info(priv, info); } +static int mlx5i_flash_device(struct net_device *netdev, + struct ethtool_flash *flash) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_flash_device(priv, flash); +} + enum mlx5_ptys_width { MLX5_PTYS_WIDTH_1X = 1 << 0, MLX5_PTYS_WIDTH_2X = 1 << 1, @@ -233,6 +241,7 @@ const struct ethtool_ops mlx5i_ethtool_ops = { .get_ethtool_stats = mlx5i_get_ethtool_stats, .get_ringparam = mlx5i_get_ringparam, .set_ringparam = mlx5i_set_ringparam, + .flash_device = mlx5i_flash_device, .get_channels = mlx5i_get_channels, .set_channels = mlx5i_set_channels, .get_coalesce = mlx5i_get_coalesce, -- cgit v1.2.1 From d6846bfbeeac873d85f32bd2b988fa94c89dbcb8 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 12 Aug 2019 11:16:11 +0900 Subject: nfsd: fix dentry leak upon mkdir failure. syzbot is reporting that nfsd_mkdir() forgot to remove dentry created by d_alloc_name() when __nfsd_mkdir() failed (due to memory allocation fault injection) [1]. [1] https://syzkaller.appspot.com/bug?id=ce41a1f769ea4637ebffedf004a803e8405b4674 Signed-off-by: Tetsuo Handa Reported-by: syzbot Fixes: e8a79fb14f6b76b5 ("nfsd: add nfsd/clients directory") [bfields: clean up in nfsd_mkdir instead of __nfsd_mkdir] Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 13c548733860..928a0b2c05dc 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1205,6 +1205,7 @@ out: inode_unlock(dir); return dentry; out_err: + dput(dentry); dentry = ERR_PTR(ret); goto out; } -- cgit v1.2.1 From dc46bba709cfb45e4b2d40cf45aaeacb82690504 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 14 Aug 2019 21:57:37 -0400 Subject: nfsd: use i_wrlock instead of rcu for nfsdfs i_private synchronize_rcu() gets called multiple times each time a client is destroyed. If the laundromat thread has a lot of clients to destroy, the delay can be noticeable. This was causing pynfs test RENEW3 to fail. We could embed an rcu_head in each inode and do the kref_put in an rcu callback. But simplest is just to take a lock here. (I also wonder if the laundromat thread would be better replaced by a bunch of scheduled work or timers or something.) Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 928a0b2c05dc..b14f825c62fe 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1215,11 +1215,9 @@ static void clear_ncl(struct inode *inode) struct nfsdfs_client *ncl = inode->i_private; inode->i_private = NULL; - synchronize_rcu(); kref_put(&ncl->cl_ref, ncl->cl_release); } - static struct nfsdfs_client *__get_nfsdfs_client(struct inode *inode) { struct nfsdfs_client *nc = inode->i_private; @@ -1233,9 +1231,9 @@ struct nfsdfs_client *get_nfsdfs_client(struct inode *inode) { struct nfsdfs_client *nc; - rcu_read_lock(); + inode_lock_shared(inode); nc = __get_nfsdfs_client(inode); - rcu_read_unlock(); + inode_unlock_shared(inode); return nc; } /* from __rpc_unlink */ -- cgit v1.2.1 From 6f967f8b1be7001b31c46429f2ee7d275af2190f Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 14 Aug 2019 00:14:49 -0500 Subject: liquidio: add cleanup in octeon_setup_iq() If oct->fn_list.enable_io_queues() fails, no cleanup is executed, leading to memory/resource leaks. To fix this issue, invoke octeon_delete_instr_queue() before returning from the function. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/request_manager.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index 032224178b64..6dd65f9b347c 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -237,8 +237,10 @@ int octeon_setup_iq(struct octeon_device *oct, } oct->num_iqs++; - if (oct->fn_list.enable_io_queues(oct)) + if (oct->fn_list.enable_io_queues(oct)) { + octeon_delete_instr_queue(oct, iq_no); return 1; + } return 0; } -- cgit v1.2.1 From d85f01775850a35eae47a0090839baf510c1ef12 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 14 Aug 2019 05:31:54 +0000 Subject: net: tls, fix sk_write_space NULL write when tx disabled The ctx->sk_write_space pointer is only set when TLS tx mode is enabled. When running without TX mode its a null pointer but we still set the sk sk_write_space pointer on close(). Fix the close path to only overwrite sk->sk_write_space when the current pointer is to the tls_write_space function indicating the tls module should clean it up properly as well. Reported-by: Hillf Danton Cc: Ying Xue Cc: Andrey Konovalov Fixes: 57c722e932cfb ("net/tls: swap sk_write_space on close") Signed-off-by: John Fastabend Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index ce6ef56a65ef..43252a801c3f 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -308,7 +308,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) if (free_ctx) icsk->icsk_ulp_data = NULL; sk->sk_prot = ctx->sk_proto; - sk->sk_write_space = ctx->sk_write_space; + if (sk->sk_write_space == tls_write_space) + sk->sk_write_space = ctx->sk_write_space; write_unlock_bh(&sk->sk_callback_lock); release_sock(sk); if (ctx->tx_conf == TLS_SW) -- cgit v1.2.1 From 20fb7c7a39b5c719e2e619673b5f5729ee7d2306 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 14 Aug 2019 01:38:39 -0500 Subject: net: myri10ge: fix memory leaks In myri10ge_probe(), myri10ge_alloc_slices() is invoked to allocate slices related structures. Later on, myri10ge_request_irq() is used to get an irq. However, if this process fails, the allocated slices related structures are not deallocated, leading to memory leaks. To fix this issue, revise the target label of the goto statement to 'abort_with_slices'. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index d8b7fba96d58..337b0cbfd153 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -3919,7 +3919,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * setup (if available). */ status = myri10ge_request_irq(mgp); if (status != 0) - goto abort_with_firmware; + goto abort_with_slices; myri10ge_free_irq(mgp); /* Save configuration space to be restored if the -- cgit v1.2.1 From cf3591ef832915892f2499b7e54b51d4c578b28c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 8 Aug 2019 05:40:04 -0400 Subject: Revert "dm bufio: fix deadlock with loop device" Revert the commit bd293d071ffe65e645b4d8104f9d8fe15ea13862. The proper fix has been made available with commit d0a255e795ab ("loop: set PF_MEMALLOC_NOIO for the worker thread"). Note that the fix offered by commit bd293d071ffe doesn't really prevent the deadlock from occuring - if we look at the stacktrace reported by Junxiao Bi, we see that it hangs in bit_wait_io and not on the mutex - i.e. it has already successfully taken the mutex. Changing the mutex from mutex_lock to mutex_trylock won't help with deadlocks that happen afterwards. PID: 474 TASK: ffff8813e11f4600 CPU: 10 COMMAND: "kswapd0" #0 [ffff8813dedfb938] __schedule at ffffffff8173f405 #1 [ffff8813dedfb990] schedule at ffffffff8173fa27 #2 [ffff8813dedfb9b0] schedule_timeout at ffffffff81742fec #3 [ffff8813dedfba60] io_schedule_timeout at ffffffff8173f186 #4 [ffff8813dedfbaa0] bit_wait_io at ffffffff8174034f #5 [ffff8813dedfbac0] __wait_on_bit at ffffffff8173fec8 #6 [ffff8813dedfbb10] out_of_line_wait_on_bit at ffffffff8173ff81 #7 [ffff8813dedfbb90] __make_buffer_clean at ffffffffa038736f [dm_bufio] #8 [ffff8813dedfbbb0] __try_evict_buffer at ffffffffa0387bb8 [dm_bufio] #9 [ffff8813dedfbbd0] dm_bufio_shrink_scan at ffffffffa0387cc3 [dm_bufio] #10 [ffff8813dedfbc40] shrink_slab at ffffffff811a87ce #11 [ffff8813dedfbd30] shrink_zone at ffffffff811ad778 #12 [ffff8813dedfbdc0] kswapd at ffffffff811ae92f #13 [ffff8813dedfbec0] kthread at ffffffff810a8428 #14 [ffff8813dedfbf50] ret_from_fork at ffffffff81745242 Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Fixes: bd293d071ffe ("dm bufio: fix deadlock with loop device") Depends-on: d0a255e795ab ("loop: set PF_MEMALLOC_NOIO for the worker thread") Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index b6b5acc92ca2..2a48ea3f1b30 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1599,7 +1599,9 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) unsigned long freed; c = container_of(shrink, struct dm_bufio_client, shrinker); - if (!dm_bufio_trylock(c)) + if (sc->gfp_mask & __GFP_FS) + dm_bufio_lock(c); + else if (!dm_bufio_trylock(c)) return SHRINK_STOP; freed = __scan(c, sc->nr_to_scan, sc->gfp_mask); -- cgit v1.2.1 From d1fef41465f0e8cae0693fb184caa6bfafb6cd16 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Mon, 5 Aug 2019 16:56:03 -0700 Subject: dm kcopyd: always complete failed jobs This patch fixes a problem in dm-kcopyd that may leave jobs in complete queue indefinitely in the event of backing storage failure. This behavior has been observed while running 100% write file fio workload against an XFS volume created on top of a dm-zoned target device. If the underlying storage of dm-zoned goes to offline state under I/O, kcopyd sometimes never issues the end copy callback and dm-zoned reclaim work hangs indefinitely waiting for that completion. This behavior was traced down to the error handling code in process_jobs() function that places the failed job to complete_jobs queue, but doesn't wake up the job handler. In case of backing device failure, all outstanding jobs may end up going to complete_jobs queue via this code path and then stay there forever because there are no more successful I/O jobs to wake up the job handler. This patch adds a wake() call to always wake up kcopyd job wait queue for all I/O jobs that fail before dm_io() gets called for that job. The patch also sets the write error status in all sub jobs that are failed because their master job has failed. Fixes: b73c67c2cbb00 ("dm kcopyd: add sequential write feature") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-kcopyd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index df2011de7be2..1bbe4a34ef4c 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -566,8 +566,10 @@ static int run_io_job(struct kcopyd_job *job) * no point in continuing. */ if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) && - job->master_job->write_err) + job->master_job->write_err) { + job->write_err = job->master_job->write_err; return -EIO; + } io_job_start(job->kc->throttle); @@ -619,6 +621,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, else job->read_err = 1; push(&kc->complete_jobs, job); + wake(kc); break; } -- cgit v1.2.1 From b234c6d7a703661b5045c5bf569b7c99d2edbf88 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Sat, 10 Aug 2019 14:43:09 -0700 Subject: dm zoned: improve error handling in reclaim There are several places in reclaim code where errors are not propagated to the main function, dmz_reclaim(). This function is responsible for unlocking zones that might be still locked at the end of any failed reclaim iterations. As the result, some device zones may be left permanently locked for reclaim, degrading target's capability to reclaim zones. This patch fixes these issues as follows - Make sure that dmz_reclaim_buf(), dmz_reclaim_seq_data() and dmz_reclaim_rnd_data() return error codes to the caller. dmz_reclaim() function is renamed to dmz_do_reclaim() to avoid clashing with "struct dmz_reclaim" and is modified to return the error to the caller. dmz_get_zone_for_reclaim() now returns an error instead of NULL pointer and reclaim code checks for that error. Error logging/debug messages are added where necessary. Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 4 ++-- drivers/md/dm-zoned-reclaim.c | 28 +++++++++++++++++++--------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 8545dcee9fd0..935d9be5af39 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1542,7 +1542,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd) struct dm_zone *zone; if (list_empty(&zmd->map_rnd_list)) - return NULL; + return ERR_PTR(-EBUSY); list_for_each_entry(zone, &zmd->map_rnd_list, link) { if (dmz_is_buf(zone)) @@ -1553,7 +1553,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd) return dzone; } - return NULL; + return ERR_PTR(-EBUSY); } /* diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index edf4b95eb075..e381354dc136 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -215,7 +215,7 @@ static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone) dmz_unlock_flush(zmd); - return 0; + return ret; } /* @@ -259,7 +259,7 @@ static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone) dmz_unlock_flush(zmd); - return 0; + return ret; } /* @@ -312,7 +312,7 @@ static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone) dmz_unlock_flush(zmd); - return 0; + return ret; } /* @@ -334,7 +334,7 @@ static void dmz_reclaim_empty(struct dmz_reclaim *zrc, struct dm_zone *dzone) /* * Find a candidate zone for reclaim and process it. */ -static void dmz_reclaim(struct dmz_reclaim *zrc) +static int dmz_do_reclaim(struct dmz_reclaim *zrc) { struct dmz_metadata *zmd = zrc->metadata; struct dm_zone *dzone; @@ -344,8 +344,8 @@ static void dmz_reclaim(struct dmz_reclaim *zrc) /* Get a data zone */ dzone = dmz_get_zone_for_reclaim(zmd); - if (!dzone) - return; + if (IS_ERR(dzone)) + return PTR_ERR(dzone); start = jiffies; @@ -391,13 +391,20 @@ static void dmz_reclaim(struct dmz_reclaim *zrc) out: if (ret) { dmz_unlock_zone_reclaim(dzone); - return; + return ret; } - (void) dmz_flush_metadata(zrc->metadata); + ret = dmz_flush_metadata(zrc->metadata); + if (ret) { + dmz_dev_debug(zrc->dev, + "Metadata flush for zone %u failed, err %d\n", + dmz_id(zmd, rzone), ret); + return ret; + } dmz_dev_debug(zrc->dev, "Reclaimed zone %u in %u ms", dmz_id(zmd, rzone), jiffies_to_msecs(jiffies - start)); + return 0; } /* @@ -442,6 +449,7 @@ static void dmz_reclaim_work(struct work_struct *work) struct dmz_metadata *zmd = zrc->metadata; unsigned int nr_rnd, nr_unmap_rnd; unsigned int p_unmap_rnd; + int ret; if (!dmz_should_reclaim(zrc)) { mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD); @@ -471,7 +479,9 @@ static void dmz_reclaim_work(struct work_struct *work) (dmz_target_idle(zrc) ? "Idle" : "Busy"), p_unmap_rnd, nr_unmap_rnd, nr_rnd); - dmz_reclaim(zrc); + ret = dmz_do_reclaim(zrc); + if (ret) + dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret); dmz_schedule_reclaim(zrc); } -- cgit v1.2.1 From d7428c50118e739e672656c28d2b26b09375d4e0 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Sat, 10 Aug 2019 14:43:10 -0700 Subject: dm zoned: improve error handling in i/o map code Some errors are ignored in the I/O path during queueing chunks for processing by chunk works. Since at least these errors are transient in nature, it should be possible to retry the failed incoming commands. The fix - Errors that can happen while queueing chunks are carried upwards to the main mapping function and it now returns DM_MAPIO_REQUEUE for any incoming requests that can not be properly queued. Error logging/debug messages are added where needed. Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-target.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 51d029bbb740..944db71ed3d7 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -513,22 +513,24 @@ static void dmz_flush_work(struct work_struct *work) * Get a chunk work and start it to process a new BIO. * If the BIO chunk has no work yet, create one. */ -static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) +static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) { unsigned int chunk = dmz_bio_chunk(dmz->dev, bio); struct dm_chunk_work *cw; + int ret = 0; mutex_lock(&dmz->chunk_lock); /* Get the BIO chunk work. If one is not active yet, create one */ cw = radix_tree_lookup(&dmz->chunk_rxtree, chunk); if (!cw) { - int ret; /* Create a new chunk work */ cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO); - if (!cw) + if (unlikely(!cw)) { + ret = -ENOMEM; goto out; + } INIT_WORK(&cw->work, dmz_chunk_work); refcount_set(&cw->refcount, 0); @@ -539,7 +541,6 @@ static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) ret = radix_tree_insert(&dmz->chunk_rxtree, chunk, cw); if (unlikely(ret)) { kfree(cw); - cw = NULL; goto out; } } @@ -547,10 +548,12 @@ static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) bio_list_add(&cw->bio_list, bio); dmz_get_chunk_work(cw); + dmz_reclaim_bio_acc(dmz->reclaim); if (queue_work(dmz->chunk_wq, &cw->work)) dmz_get_chunk_work(cw); out: mutex_unlock(&dmz->chunk_lock); + return ret; } /* @@ -564,6 +567,7 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) sector_t sector = bio->bi_iter.bi_sector; unsigned int nr_sectors = bio_sectors(bio); sector_t chunk_sector; + int ret; dmz_dev_debug(dev, "BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks", bio_op(bio), (unsigned long long)sector, nr_sectors, @@ -601,8 +605,14 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) dm_accept_partial_bio(bio, dev->zone_nr_sectors - chunk_sector); /* Now ready to handle this BIO */ - dmz_reclaim_bio_acc(dmz->reclaim); - dmz_queue_chunk_work(dmz, bio); + ret = dmz_queue_chunk_work(dmz, bio); + if (ret) { + dmz_dev_debug(dmz->dev, + "BIO op %d, can't process chunk %llu, err %i\n", + bio_op(bio), (u64)dmz_bio_chunk(dmz->dev, bio), + ret); + return DM_MAPIO_REQUEUE; + } return DM_MAPIO_SUBMITTED; } -- cgit v1.2.1 From 75d66ffb48efb30f2dd42f041ba8b39c5b2bd115 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Sat, 10 Aug 2019 14:43:11 -0700 Subject: dm zoned: properly handle backing device failure dm-zoned is observed to lock up or livelock in case of hardware failure or some misconfiguration of the backing zoned device. This patch adds a new dm-zoned target function that checks the status of the backing device. If the request queue of the backing device is found to be in dying state or the SCSI backing device enters offline state, the health check code sets a dm-zoned target flag prompting all further incoming I/O to be rejected. In order to detect backing device failures timely, this new function is called in the request mapping path, at the beginning of every reclaim run and before performing any metadata I/O. The proper way out of this situation is to do dmsetup remove and recreate the target when the problem with the backing device is resolved. Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 51 +++++++++++++++++++++++++++++++++--------- drivers/md/dm-zoned-reclaim.c | 18 +++++++++++++-- drivers/md/dm-zoned-target.c | 45 +++++++++++++++++++++++++++++++++++-- drivers/md/dm-zoned.h | 10 +++++++++ 4 files changed, 110 insertions(+), 14 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 935d9be5af39..2882897aece2 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -402,15 +402,18 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd, sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no; struct bio *bio; + if (dmz_bdev_is_dying(zmd->dev)) + return ERR_PTR(-EIO); + /* Get a new block and a BIO to read it */ mblk = dmz_alloc_mblock(zmd, mblk_no); if (!mblk) - return NULL; + return ERR_PTR(-ENOMEM); bio = bio_alloc(GFP_NOIO, 1); if (!bio) { dmz_free_mblock(zmd, mblk); - return NULL; + return ERR_PTR(-ENOMEM); } spin_lock(&zmd->mblk_lock); @@ -541,8 +544,8 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd, if (!mblk) { /* Cache miss: read the block from disk */ mblk = dmz_get_mblock_slow(zmd, mblk_no); - if (!mblk) - return ERR_PTR(-ENOMEM); + if (IS_ERR(mblk)) + return mblk; } /* Wait for on-going read I/O and check for error */ @@ -570,16 +573,19 @@ static void dmz_dirty_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk) /* * Issue a metadata block write BIO. */ -static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, - unsigned int set) +static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, + unsigned int set) { sector_t block = zmd->sb[set].block + mblk->no; struct bio *bio; + if (dmz_bdev_is_dying(zmd->dev)) + return -EIO; + bio = bio_alloc(GFP_NOIO, 1); if (!bio) { set_bit(DMZ_META_ERROR, &mblk->state); - return; + return -ENOMEM; } set_bit(DMZ_META_WRITING, &mblk->state); @@ -591,6 +597,8 @@ static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO); bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0); submit_bio(bio); + + return 0; } /* @@ -602,6 +610,9 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block, struct bio *bio; int ret; + if (dmz_bdev_is_dying(zmd->dev)) + return -EIO; + bio = bio_alloc(GFP_NOIO, 1); if (!bio) return -ENOMEM; @@ -659,22 +670,29 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd, { struct dmz_mblock *mblk; struct blk_plug plug; - int ret = 0; + int ret = 0, nr_mblks_submitted = 0; /* Issue writes */ blk_start_plug(&plug); - list_for_each_entry(mblk, write_list, link) - dmz_write_mblock(zmd, mblk, set); + list_for_each_entry(mblk, write_list, link) { + ret = dmz_write_mblock(zmd, mblk, set); + if (ret) + break; + nr_mblks_submitted++; + } blk_finish_plug(&plug); /* Wait for completion */ list_for_each_entry(mblk, write_list, link) { + if (!nr_mblks_submitted) + break; wait_on_bit_io(&mblk->state, DMZ_META_WRITING, TASK_UNINTERRUPTIBLE); if (test_bit(DMZ_META_ERROR, &mblk->state)) { clear_bit(DMZ_META_ERROR, &mblk->state); ret = -EIO; } + nr_mblks_submitted--; } /* Flush drive cache (this will also sync data) */ @@ -736,6 +754,11 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) */ dmz_lock_flush(zmd); + if (dmz_bdev_is_dying(zmd->dev)) { + ret = -EIO; + goto out; + } + /* Get dirty blocks */ spin_lock(&zmd->mblk_lock); list_splice_init(&zmd->mblk_dirty_list, &write_list); @@ -1631,6 +1654,10 @@ again: /* Alloate a random zone */ dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); if (!dzone) { + if (dmz_bdev_is_dying(zmd->dev)) { + dzone = ERR_PTR(-EIO); + goto out; + } dmz_wait_for_free_zones(zmd); goto again; } @@ -1728,6 +1755,10 @@ again: /* Alloate a random zone */ bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); if (!bzone) { + if (dmz_bdev_is_dying(zmd->dev)) { + bzone = ERR_PTR(-EIO); + goto out; + } dmz_wait_for_free_zones(zmd); goto again; } diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index e381354dc136..9470b8f77a33 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -37,7 +37,7 @@ enum { /* * Number of seconds of target BIO inactivity to consider the target idle. */ -#define DMZ_IDLE_PERIOD (10UL * HZ) +#define DMZ_IDLE_PERIOD (10UL * HZ) /* * Percentage of unmapped (free) random zones below which reclaim starts @@ -134,6 +134,9 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc, set_bit(DM_KCOPYD_WRITE_SEQ, &flags); while (block < end_block) { + if (dev->flags & DMZ_BDEV_DYING) + return -EIO; + /* Get a valid region from the source zone */ ret = dmz_first_valid_block(zmd, src_zone, &block); if (ret <= 0) @@ -451,6 +454,9 @@ static void dmz_reclaim_work(struct work_struct *work) unsigned int p_unmap_rnd; int ret; + if (dmz_bdev_is_dying(zrc->dev)) + return; + if (!dmz_should_reclaim(zrc)) { mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD); return; @@ -480,8 +486,16 @@ static void dmz_reclaim_work(struct work_struct *work) p_unmap_rnd, nr_unmap_rnd, nr_rnd); ret = dmz_do_reclaim(zrc); - if (ret) + if (ret) { dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret); + if (ret == -EIO) + /* + * LLD might be performing some error handling sequence + * at the underlying device. To not interfere, do not + * attempt to schedule the next reclaim run immediately. + */ + return; + } dmz_schedule_reclaim(zrc); } diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 944db71ed3d7..ff3fd011796e 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -133,6 +133,8 @@ static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone, refcount_inc(&bioctx->ref); generic_make_request(clone); + if (clone->bi_status == BLK_STS_IOERR) + return -EIO; if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone)) zone->wp_block += nr_blocks; @@ -277,8 +279,8 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz, /* Get the buffer zone. One will be allocated if needed */ bzone = dmz_get_chunk_buffer(zmd, zone); - if (!bzone) - return -ENOSPC; + if (IS_ERR(bzone)) + return PTR_ERR(bzone); if (dmz_is_readonly(bzone)) return -EROFS; @@ -389,6 +391,11 @@ static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw, dmz_lock_metadata(zmd); + if (dmz->dev->flags & DMZ_BDEV_DYING) { + ret = -EIO; + goto out; + } + /* * Get the data zone mapping the chunk. There may be no * mapping for read and discard. If a mapping is obtained, @@ -493,6 +500,8 @@ static void dmz_flush_work(struct work_struct *work) /* Flush dirty metadata blocks */ ret = dmz_flush_metadata(dmz->metadata); + if (ret) + dmz_dev_debug(dmz->dev, "Metadata flush failed, rc=%d\n", ret); /* Process queued flush requests */ while (1) { @@ -556,6 +565,32 @@ out: return ret; } +/* + * Check the backing device availability. If it's on the way out, + * start failing I/O. Reclaim and metadata components also call this + * function to cleanly abort operation in the event of such failure. + */ +bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev) +{ + struct gendisk *disk; + + if (!(dmz_dev->flags & DMZ_BDEV_DYING)) { + disk = dmz_dev->bdev->bd_disk; + if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) { + dmz_dev_warn(dmz_dev, "Backing device queue dying"); + dmz_dev->flags |= DMZ_BDEV_DYING; + } else if (disk->fops->check_events) { + if (disk->fops->check_events(disk, 0) & + DISK_EVENT_MEDIA_CHANGE) { + dmz_dev_warn(dmz_dev, "Backing device offline"); + dmz_dev->flags |= DMZ_BDEV_DYING; + } + } + } + + return dmz_dev->flags & DMZ_BDEV_DYING; +} + /* * Process a new BIO. */ @@ -569,6 +604,9 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) sector_t chunk_sector; int ret; + if (dmz_bdev_is_dying(dmz->dev)) + return DM_MAPIO_KILL; + dmz_dev_debug(dev, "BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks", bio_op(bio), (unsigned long long)sector, nr_sectors, (unsigned long long)dmz_bio_chunk(dmz->dev, bio), @@ -865,6 +903,9 @@ static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) { struct dmz_target *dmz = ti->private; + if (dmz_bdev_is_dying(dmz->dev)) + return -ENODEV; + *bdev = dmz->dev->bdev; return 0; diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h index ed8de49c9a08..93a64529f219 100644 --- a/drivers/md/dm-zoned.h +++ b/drivers/md/dm-zoned.h @@ -56,6 +56,8 @@ struct dmz_dev { unsigned int nr_zones; + unsigned int flags; + sector_t zone_nr_sectors; unsigned int zone_nr_sectors_shift; @@ -67,6 +69,9 @@ struct dmz_dev { (dev)->zone_nr_sectors_shift) #define dmz_chunk_block(dev, b) ((b) & ((dev)->zone_nr_blocks - 1)) +/* Device flags. */ +#define DMZ_BDEV_DYING (1 << 0) + /* * Zone descriptor. */ @@ -245,4 +250,9 @@ void dmz_resume_reclaim(struct dmz_reclaim *zrc); void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc); void dmz_schedule_reclaim(struct dmz_reclaim *zrc); +/* + * Functions defined in dm-zoned-target.c + */ +bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev); + #endif /* DM_ZONED_H */ -- cgit v1.2.1 From bae9a0aa331d4cc20bd73c11f91abfceda4b7b29 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Fri, 2 Aug 2019 15:02:50 -0700 Subject: dm zoned: add SPDX license identifiers Signed-off-by: Dmitry Fomichev Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 1 + drivers/md/dm-zoned-reclaim.c | 1 + drivers/md/dm-zoned-target.c | 1 + drivers/md/dm-zoned.h | 1 + 4 files changed, 4 insertions(+) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 2882897aece2..a033b5b1d77e 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017 Western Digital Corporation or its affiliates. * diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index 9470b8f77a33..8297b7558154 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017 Western Digital Corporation or its affiliates. * diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index ff3fd011796e..31478fef6032 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017 Western Digital Corporation or its affiliates. * diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h index 93a64529f219..d8e70b0ade35 100644 --- a/drivers/md/dm-zoned.h +++ b/drivers/md/dm-zoned.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2017 Western Digital Corporation or its affiliates. * -- cgit v1.2.1 From ad1bd578bd5afdf20de0bead42d25f199601211d Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Fri, 2 Aug 2019 15:02:51 -0700 Subject: dm zoned: fix a few typos Signed-off-by: Dmitry Fomichev Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 8 ++++---- drivers/md/dm-zoned-reclaim.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index a033b5b1d77e..2a5bc51fd6d5 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -35,7 +35,7 @@ * (1) Super block (1 block) * (2) Chunk mapping table (nr_map_blocks) * (3) Bitmap blocks (nr_bitmap_blocks) - * All metadata blocks are stored in conventional zones, starting from the + * All metadata blocks are stored in conventional zones, starting from * the first conventional zone found on disk. */ struct dmz_super { @@ -234,7 +234,7 @@ void dmz_unlock_map(struct dmz_metadata *zmd) * Lock/unlock metadata access. This is a "read" lock on a semaphore * that prevents metadata flush from running while metadata are being * modified. The actual metadata write mutual exclusion is achieved with - * the map lock and zone styate management (active and reclaim state are + * the map lock and zone state management (active and reclaim state are * mutually exclusive). */ void dmz_lock_metadata(struct dmz_metadata *zmd) @@ -1652,7 +1652,7 @@ again: if (op != REQ_OP_WRITE) goto out; - /* Alloate a random zone */ + /* Allocate a random zone */ dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); if (!dzone) { if (dmz_bdev_is_dying(zmd->dev)) { @@ -1753,7 +1753,7 @@ again: if (bzone) goto out; - /* Alloate a random zone */ + /* Allocate a random zone */ bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); if (!bzone) { if (dmz_bdev_is_dying(zmd->dev)) { diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index 8297b7558154..d240d7ca8a8a 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -438,7 +438,7 @@ static bool dmz_should_reclaim(struct dmz_reclaim *zrc) return false; /* - * If the percentage of unmappped random zones is low, + * If the percentage of unmapped random zones is low, * reclaim even if the target is busy. */ return p_unmap_rnd <= DMZ_RECLAIM_LOW_UNMAP_RND; -- cgit v1.2.1 From 5729b6e5a1bcb0bbc28abe82d749c7392f66d2c7 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sat, 10 Aug 2019 12:30:27 -0400 Subject: dm integrity: fix a crash due to BUG_ON in __journal_read_write() Fix a crash that was introduced by the commit 724376a04d1a. The crash is reported here: https://gitlab.com/cryptsetup/cryptsetup/issues/468 When reading from the integrity device, the function dm_integrity_map_continue calls find_journal_node to find out if the location to read is present in the journal. Then, it calculates how many sectors are consecutively stored in the journal. Then, it locks the range with add_new_range and wait_and_add_new_range. The problem is that during wait_and_add_new_range, we hold no locks (we don't hold ic->endio_wait.lock and we don't hold a range lock), so the journal may change arbitrarily while wait_and_add_new_range sleeps. The code then goes to __journal_read_write and hits BUG_ON(journal_entry_get_sector(je) != logical_sector); because the journal has changed. In order to fix this bug, we need to re-check the journal location after wait_and_add_new_range. We restrict the length to one block in order to not complicate the code too much. Fixes: 724376a04d1a ("dm integrity: implement fair range locks") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index b1b0de402dfc..9118ab85cb3a 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1943,7 +1943,22 @@ offload_to_thread: queue_work(ic->wait_wq, &dio->work); return; } + if (journal_read_pos != NOT_FOUND) + dio->range.n_sectors = ic->sectors_per_block; wait_and_add_new_range(ic, &dio->range); + /* + * wait_and_add_new_range drops the spinlock, so the journal + * may have been changed arbitrarily. We need to recheck. + * To simplify the code, we restrict I/O size to just one block. + */ + if (journal_read_pos != NOT_FOUND) { + sector_t next_sector; + unsigned new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector); + if (unlikely(new_pos != journal_read_pos)) { + remove_range_unlocked(ic, &dio->range); + goto retry; + } + } } spin_unlock_irq(&ic->endio_wait.lock); -- cgit v1.2.1 From bebd6997163addc1938db8c61754a23ffdf8ccc4 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 15 Aug 2019 16:18:26 -0400 Subject: nfsd: initialize i_private before d_add A process could race in an open and attempt to read one of these files before i_private is initialized, and get a spurious error. Reported-by: Al Viro Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index b14f825c62fe..3cf4f6aa48d6 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1171,13 +1171,17 @@ static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode) return inode; } -static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode, struct nfsdfs_client *ncl) { struct inode *inode; inode = nfsd_get_inode(dir->i_sb, mode); if (!inode) return -ENOMEM; + if (ncl) { + inode->i_private = ncl; + kref_get(&ncl->cl_ref); + } d_add(dentry, inode); inc_nlink(dir); fsnotify_mkdir(dir, dentry); @@ -1194,13 +1198,9 @@ static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *nc dentry = d_alloc_name(parent, name); if (!dentry) goto out_err; - ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600); + ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600, ncl); if (ret) goto out_err; - if (ncl) { - d_inode(dentry)->i_private = ncl; - kref_get(&ncl->cl_ref); - } out: inode_unlock(dir); return dentry; -- cgit v1.2.1 From 32d3182cd2cd29b2e7e04df7b0db350fbe11289f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Aug 2019 02:11:57 -0700 Subject: net/packet: fix race in tpacket_snd() packet_sendmsg() checks tx_ring.pg_vec to decide if it must call tpacket_snd(). Problem is that the check is lockless, meaning another thread can issue a concurrent setsockopt(PACKET_TX_RING ) to flip tx_ring.pg_vec back to NULL. Given that tpacket_snd() grabs pg_vec_lock mutex, we can perform the check again to solve the race. syzbot reported : kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 11429 Comm: syz-executor394 Not tainted 5.3.0-rc4+ #101 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:packet_lookup_frame+0x8d/0x270 net/packet/af_packet.c:474 Code: c1 ee 03 f7 73 0c 80 3c 0e 00 0f 85 cb 01 00 00 48 8b 0b 89 c0 4c 8d 24 c1 48 b8 00 00 00 00 00 fc ff df 4c 89 e1 48 c1 e9 03 <80> 3c 01 00 0f 85 94 01 00 00 48 8d 7b 10 4d 8b 3c 24 48 b8 00 00 RSP: 0018:ffff88809f82f7b8 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff8880a45c7030 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 1ffff110148b8e06 RDI: ffff8880a45c703c RBP: ffff88809f82f7e8 R08: ffff888087aea200 R09: fffffbfff134ae50 R10: fffffbfff134ae4f R11: ffffffff89a5727f R12: 0000000000000000 R13: 0000000000000001 R14: ffff8880a45c6ac0 R15: 0000000000000000 FS: 00007fa04716f700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa04716edb8 CR3: 0000000091eb4000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: packet_current_frame net/packet/af_packet.c:487 [inline] tpacket_snd net/packet/af_packet.c:2667 [inline] packet_sendmsg+0x590/0x6250 net/packet/af_packet.c:2975 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:657 ___sys_sendmsg+0x3e2/0x920 net/socket.c:2311 __sys_sendmmsg+0x1bf/0x4d0 net/socket.c:2413 __do_sys_sendmmsg net/socket.c:2442 [inline] __se_sys_sendmmsg net/socket.c:2439 [inline] __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2439 do_syscall_64+0xfd/0x6a0 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 69e3c75f4d54 ("net: TX_RING and packet mmap") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/packet/af_packet.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8d54f3047768..e2742b006d25 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2618,6 +2618,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); + /* packet_sendmsg() check on tx_ring.pg_vec was lockless, + * we need to confirm it under protection of pg_vec_lock. + */ + if (unlikely(!po->tx_ring.pg_vec)) { + err = -EBUSY; + goto out; + } if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; -- cgit v1.2.1 From d34b044038bfb0e19caa8b019910efc465f41d5f Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 15 Aug 2019 15:22:23 +0100 Subject: tools: bpftool: close prog FD before exit on showing a single program When showing metadata about a single program by invoking "bpftool prog show PROG", the file descriptor referring to the program is not closed before returning from the function. Let's close it. Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool") Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Acked-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/prog.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 66f04a4846a5..43fdbbfe41bb 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -363,7 +363,9 @@ static int do_show(int argc, char **argv) if (fd < 0) return -1; - return show_prog(fd); + err = show_prog(fd); + close(fd); + return err; } if (argc) -- cgit v1.2.1 From a53358a31c989c360ea59536d28762b9d2d68d19 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Mon, 5 Aug 2019 18:18:37 +0200 Subject: drm: rcar_lvds: Fix dual link mode operations The R-Car LVDS encoder units support dual-link operations by splitting the pixel output between the primary encoder and the companion encoder. Currently the companion encoder fails at probe time, causing the registration of the primary to fail as well, preventing the whole DU unit from being registered at all. Fix this by not bailing out from probe with error if the "renesas,companion" property is not specified. Fixes: fa440d870358 ("drm: rcar-du: lvds: Add support for dual-link mode") Reported-by: Fabrizio Castro Signed-off-by: Jacopo Mondi Reviewed-by: Laurent Pinchart Signed-off-by: Laurent Pinchart --- drivers/gpu/drm/rcar-du/rcar_lvds.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds.c b/drivers/gpu/drm/rcar-du/rcar_lvds.c index 1c62578590f4..082d02c84024 100644 --- a/drivers/gpu/drm/rcar-du/rcar_lvds.c +++ b/drivers/gpu/drm/rcar-du/rcar_lvds.c @@ -673,10 +673,8 @@ static int rcar_lvds_parse_dt_companion(struct rcar_lvds *lvds) /* Locate the companion LVDS encoder for dual-link operation, if any. */ companion = of_parse_phandle(dev->of_node, "renesas,companion", 0); - if (!companion) { - dev_err(dev, "Companion LVDS encoder not found\n"); - return -ENXIO; - } + if (!companion) + return 0; /* * Sanity check: the companion encoder must have the same compatible -- cgit v1.2.1 From 4f8c6aba37da199155a121c6cdc38505a9eb0259 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 13 Aug 2019 14:41:47 -0700 Subject: clk: Fix falling back to legacy parent string matching Calls to clk_core_get() will return ERR_PTR(-EINVAL) if we've started migrating a clk driver to use the DT based style of specifying parents but we haven't made any DT updates yet. This happens when we pass a non-NULL value as the 'name' argument of of_parse_clkspec(). That function returns -EINVAL in such a situation, instead of -ENOENT like we expected. The return value comes back up to clk_core_fill_parent_index() which proceeds to skip calling clk_core_lookup() because the error pointer isn't equal to -ENOENT, it's -EINVAL. Furthermore, we blindly overwrite the error pointer returned by clk_core_get() with NULL when there isn't a legacy .name member specified in the parent map. This isn't too bad right now because we don't really care to differentiate NULL from an error, but in the future we should only try to do a legacy lookup if we know we might find something. This way DT lookups that fail don't try to lookup based on strings when there isn't any string to match, hiding the error from DT parsing. Fix both these problems so that clk provider drivers can use the new style of parent mapping without having to also update their DT at the same time. This patch is based on an earlier patch from Taniya Das which checked for -EINVAL in addition to -ENOENT return values from clk_core_get(). Fixes: 601b6e93304a ("clk: Allow parents to be specified via clkspec index") Cc: Taniya Das Cc: Jerome Brunet Cc: Chen-Yu Tsai Reported-by: Taniya Das Signed-off-by: Stephen Boyd Link: https://lkml.kernel.org/r/20190813214147.34394-1-sboyd@kernel.org Tested-by: Taniya Das --- drivers/clk/clk.c | 46 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index c0990703ce54..8bce6bb4a965 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -324,6 +324,25 @@ static struct clk_core *clk_core_lookup(const char *name) return NULL; } +#ifdef CONFIG_OF +static int of_parse_clkspec(const struct device_node *np, int index, + const char *name, struct of_phandle_args *out_args); +static struct clk_hw * +of_clk_get_hw_from_clkspec(struct of_phandle_args *clkspec); +#else +static inline int of_parse_clkspec(const struct device_node *np, int index, + const char *name, + struct of_phandle_args *out_args) +{ + return -ENOENT; +} +static inline struct clk_hw * +of_clk_get_hw_from_clkspec(struct of_phandle_args *clkspec) +{ + return ERR_PTR(-ENOENT); +} +#endif + /** * clk_core_get - Find the clk_core parent of a clk * @core: clk to find parent of @@ -355,8 +374,9 @@ static struct clk_core *clk_core_lookup(const char *name) * }; * * Returns: -ENOENT when the provider can't be found or the clk doesn't - * exist in the provider. -EINVAL when the name can't be found. NULL when the - * provider knows about the clk but it isn't provided on this system. + * exist in the provider or the name can't be found in the DT node or + * in a clkdev lookup. NULL when the provider knows about the clk but it + * isn't provided on this system. * A valid clk_core pointer when the clk can be found in the provider. */ static struct clk_core *clk_core_get(struct clk_core *core, u8 p_index) @@ -367,17 +387,19 @@ static struct clk_core *clk_core_get(struct clk_core *core, u8 p_index) struct device *dev = core->dev; const char *dev_id = dev ? dev_name(dev) : NULL; struct device_node *np = core->of_node; + struct of_phandle_args clkspec; - if (np && (name || index >= 0)) - hw = of_clk_get_hw(np, index, name); - - /* - * If the DT search above couldn't find the provider or the provider - * didn't know about this clk, fallback to looking up via clkdev based - * clk_lookups - */ - if (PTR_ERR(hw) == -ENOENT && name) + if (np && (name || index >= 0) && + !of_parse_clkspec(np, index, name, &clkspec)) { + hw = of_clk_get_hw_from_clkspec(&clkspec); + of_node_put(clkspec.np); + } else if (name) { + /* + * If the DT search above couldn't find the provider fallback to + * looking up via clkdev based clk_lookups. + */ hw = clk_find_hw(dev_id, name); + } if (IS_ERR(hw)) return ERR_CAST(hw); @@ -401,7 +423,7 @@ static void clk_core_fill_parent_index(struct clk_core *core, u8 index) parent = ERR_PTR(-EPROBE_DEFER); } else { parent = clk_core_get(core, index); - if (IS_ERR(parent) && PTR_ERR(parent) == -ENOENT) + if (IS_ERR(parent) && PTR_ERR(parent) == -ENOENT && entry->name) parent = clk_core_lookup(entry->name); } -- cgit v1.2.1 From 24876f09a7dfe36a82f53d304d8c1bceb3257a0f Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Fri, 16 Aug 2019 00:31:55 +0200 Subject: clk: Fix potential NULL dereference in clk_fetch_parent_index() Don't compare the parent clock name with a NULL name in the clk_parent_map. This prevents a kernel crash when passing NULL core->parents[i].name to strcmp(). An example which triggered this is a mux clock with four parents when each of them is referenced in the clock driver using clk_parent_data.fw_name and then calling clk_set_parent(clk, 3rd_parent) on this mux. In this case the first parent is also the HW default so core->parents[i].hw is populated when the clock is registered. Calling clk_set_parent(clk, 3rd_parent) will then go through all parents and skip the first parent because it's hw pointer doesn't match. For the second parent no hw pointer is cached yet and clk_core_get(core, 1) returns a non-matching pointer (which is correct because we are comparing the second with the third parent). Comparing the result of clk_core_get(core, 2) with the requested parent gives a match. However we don't reach this point because right after the clk_core_get(core, 1) mismatch the old code tried to !strcmp(parent->name, NULL) (where the second argument is actually core->parents[i].name, but that was never populated by the clock driver). Signed-off-by: Martin Blumenstingl Link: https://lkml.kernel.org/r/20190815223155.21384-1-martin.blumenstingl@googlemail.com Fixes: fc0c209c147f ("clk: Allow parents to be specified without string names") Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 8bce6bb4a965..1c46babeb093 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1654,7 +1654,8 @@ static int clk_fetch_parent_index(struct clk_core *core, break; /* Fallback to comparing globally unique names */ - if (!strcmp(parent->name, core->parents[i].name)) + if (core->parents[i].name && + !strcmp(parent->name, core->parents[i].name)) break; } -- cgit v1.2.1 From 78e70e780b289ff59ec33a9f9c1fcecaf17a46e1 Mon Sep 17 00:00:00 2001 From: He Zhe Date: Tue, 6 Aug 2019 17:41:04 +0800 Subject: nfsd4: Fix kernel crash when reading proc file reply_cache_stats reply_cache_stats uses wrong parameter as seq file private structure and thus causes the following kernel crash when users read /proc/fs/nfsd/reply_cache_stats BUG: kernel NULL pointer dereference, address: 00000000000001f9 PGD 0 P4D 0 Oops: 0000 [#3] SMP PTI CPU: 6 PID: 1502 Comm: cat Tainted: G D 5.3.0-rc3+ #1 Hardware name: Intel Corporation Broadwell Client platform/Basking Ridge, BIOS BDW-E2R1.86C.0118.R01.1503110618 03/11/2015 RIP: 0010:nfsd_reply_cache_stats_show+0x3b/0x2d0 Code: 41 54 49 89 f4 48 89 fe 48 c7 c7 b3 10 33 88 53 bb e8 03 00 00 e8 88 82 d1 ff bf 58 89 41 00 e8 eb c5 85 00 48 83 eb 01 75 f0 <41> 8b 94 24 f8 01 00 00 48 c7 c6 be 10 33 88 4c 89 ef bb e8 03 00 RSP: 0018:ffffaa520106fe08 EFLAGS: 00010246 RAX: 000000cfe1a77123 RBX: 0000000000000000 RCX: 0000000000291b46 RDX: 000000cf00000000 RSI: 0000000000000006 RDI: 0000000000291b28 RBP: ffffaa520106fe20 R08: 0000000000000006 R09: 000000cfe17e55dd R10: ffffa424e47c0000 R11: 000000000000030b R12: 0000000000000001 R13: ffffa424e5697000 R14: 0000000000000001 R15: ffffa424e5697000 FS: 00007f805735f580(0000) GS:ffffa424f8f80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000001f9 CR3: 00000000655ce005 CR4: 00000000003606e0 Call Trace: seq_read+0x194/0x3e0 __vfs_read+0x1b/0x40 vfs_read+0x95/0x140 ksys_read+0x61/0xe0 __x64_sys_read+0x1a/0x20 do_syscall_64+0x4d/0x120 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f805728b861 Code: fe ff ff 50 48 8d 3d 86 b4 09 00 e8 79 e0 01 00 66 0f 1f 84 00 00 00 00 00 48 8d 05 d9 19 0d 00 8b 00 85 c0 75 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 c3 66 0f 1f 44 00 00 48 83 ec 28 48 89 54 RSP: 002b:00007ffea1ce3c38 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f805728b861 RDX: 0000000000020000 RSI: 00007f8057183000 RDI: 0000000000000003 RBP: 00007f8057183000 R08: 00007f8057182010 R09: 0000000000000000 R10: 0000000000000022 R11: 0000000000000246 R12: 0000559a60e8ff10 R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 Modules linked in: CR2: 00000000000001f9 ---[ end trace 01613595153f0cba ]--- RIP: 0010:nfsd_reply_cache_stats_show+0x3b/0x2d0 Code: 41 54 49 89 f4 48 89 fe 48 c7 c7 b3 10 33 88 53 bb e8 03 00 00 e8 88 82 d1 ff bf 58 89 41 00 e8 eb c5 85 00 48 83 eb 01 75 f0 <41> 8b 94 24 f8 01 00 00 48 c7 c6 be 10 33 88 4c 89 ef bb e8 03 00 RSP: 0018:ffffaa52004b3e08 EFLAGS: 00010246 RAX: 0000002bab45a7c6 RBX: 0000000000000000 RCX: 0000000000291b4c RDX: 0000002b00000000 RSI: 0000000000000004 RDI: 0000000000291b28 RBP: ffffaa52004b3e20 R08: 0000000000000004 R09: 0000002bab1c8c7a R10: ffffa424e5500000 R11: 00000000000002a9 R12: 0000000000000001 R13: ffffa424e4475000 R14: 0000000000000001 R15: ffffa424e4475000 FS: 00007f805735f580(0000) GS:ffffa424f8f80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000001f9 CR3: 00000000655ce005 CR4: 00000000003606e0 Killed Fixes: 3ba75830ce17 ("nfsd4: drc containerization") Signed-off-by: He Zhe Signed-off-by: J. Bruce Fields --- fs/nfsd/nfscache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 26ad75ae2be0..96352ab7bd81 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -571,7 +571,7 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) */ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) { - struct nfsd_net *nn = v; + struct nfsd_net *nn = m->private; seq_printf(m, "max entries: %u\n", nn->max_drc_entries); seq_printf(m, "num entries: %u\n", -- cgit v1.2.1 From 5a69e4980258c56f6d4c2048a1b9c260218785b7 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Wed, 14 Aug 2019 15:58:00 +0300 Subject: MAINTAINERS: PHY LIBRARY: Update files in the record Update MAINTAINERS to reflect that sysfs-bus-mdio was removed in commit a6cd0d2d493a ("Documentation: net-sysfs: Remove duplicate PHY device documentation") and sysfs-class-net-phydev was added in commit 86f22d04dfb5 ("net: sysfs: Document PHY device sysfs attributes"). Cc: Florian Fainelli Cc: Andrew Lunn Cc: Heiner Kallweit Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Denis Efremov Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 47800d32cfbc..d7e44a29b13e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6065,7 +6065,7 @@ M: Florian Fainelli M: Heiner Kallweit L: netdev@vger.kernel.org S: Maintained -F: Documentation/ABI/testing/sysfs-bus-mdio +F: Documentation/ABI/testing/sysfs-class-net-phydev F: Documentation/devicetree/bindings/net/ethernet-phy.yaml F: Documentation/devicetree/bindings/net/mdio* F: Documentation/networking/phy.rst -- cgit v1.2.1 From 0a66c20a6a123d6dc96c6197f02455cb64615271 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Wed, 14 Aug 2019 15:12:09 +0300 Subject: MAINTAINERS: r8169: Update path to the driver Update MAINTAINERS record to reflect the filename change. The file was moved in commit 25e992a4603c ("r8169: rename r8169.c to r8169_main.c") Cc: Heiner Kallweit Cc: nic_swsd@realtek.com Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Denis Efremov Reviewed-by: Heiner Kallweit Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d7e44a29b13e..a416574780d6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -183,7 +183,7 @@ M: Realtek linux nic maintainers M: Heiner Kallweit L: netdev@vger.kernel.org S: Maintained -F: drivers/net/ethernet/realtek/r8169.c +F: drivers/net/ethernet/realtek/r8169* 8250/16?50 (AND CLONE UARTS) SERIAL DRIVER M: Greg Kroah-Hartman -- cgit v1.2.1 From b9cbf8a64865b50fd0f4a3915fa00ac7365cdf8f Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 14 Aug 2019 11:23:13 -0500 Subject: lan78xx: Fix memory leaks In lan78xx_probe(), a new urb is allocated through usb_alloc_urb() and saved to 'dev->urb_intr'. However, in the following execution, if an error occurs, 'dev->urb_intr' is not deallocated, leading to memory leaks. To fix this issue, invoke usb_free_urb() to free the allocated urb before returning from the function. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 3d92ea6fcc02..f033fee225a1 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3792,7 +3792,7 @@ static int lan78xx_probe(struct usb_interface *intf, ret = register_netdev(netdev); if (ret != 0) { netif_err(dev, probe, netdev, "couldn't register the device\n"); - goto out3; + goto out4; } usb_set_intfdata(intf, dev); @@ -3807,12 +3807,14 @@ static int lan78xx_probe(struct usb_interface *intf, ret = lan78xx_phy_init(dev); if (ret < 0) - goto out4; + goto out5; return 0; -out4: +out5: unregister_netdev(netdev); +out4: + usb_free_urb(dev->urb_intr); out3: lan78xx_unbind(dev, intf); out2: -- cgit v1.2.1 From df451f83e1fc0fa3764a2724b0faaaf9d07ab1b6 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Fri, 16 Aug 2019 18:50:00 +0200 Subject: gpio: of: fix Freescale SPI CS quirk handling On the gta04 we see: spi_gpio: probe of spi_lcd failed with error -2 The quirk introduced in commit e3023bf80639 ("gpio: of: Handle the Freescale SPI CS") can also be triggered by a temporary -EPROBE_DEFER and so "convert" it to a hard -ENOENT. Disable that conversion by checking for -EPROBE_DEFER. Fixes: e3023bf80639 ("gpio: of: Handle the Freescale SPI CS") Suggested-by: H. Nikolaus Schaller Signed-off-by: Andreas Kemnade Link: https://lore.kernel.org/r/20190816165000.32334-1-andreas@kemnade.info Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 567fb98c0892..9762dd6d99fa 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -363,7 +363,7 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id, /* Special handling for SPI GPIOs if used */ if (IS_ERR(desc)) desc = of_find_spi_gpio(dev, con_id, &of_flags); - if (IS_ERR(desc)) { + if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER) { /* This quirk looks up flags and all */ desc = of_find_spi_cs_gpio(dev, con_id, idx, flags); if (!IS_ERR(desc)) -- cgit v1.2.1 From 712042313b23b5df7451faf4b279beb3025e990c Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Thu, 15 Aug 2019 10:24:08 +0700 Subject: tipc: fix false detection of retransmit failures This commit eliminates the use of the link 'stale_limit' & 'prev_from' (besides the already removed - 'stale_cnt') variables in the detection of repeated retransmit failures as there is no proper way to initialize them to avoid a false detection, i.e. it is not really a retransmission failure but due to a garbage values in the variables. Instead, a jiffies variable will be added to individual skbs (like the way we restrict the skb retransmissions) in order to mark the first skb retransmit time. Later on, at the next retransmissions, the timestamp will be checked to see if the skb in the link transmq is "too stale", that is, the link tolerance time has passed, so that a link reset will be ordered. Note, just checking on the first skb in the queue is fine enough since it must be the oldest one. A counter is also added to keep track the actual skb retransmissions' number for later checking when the failure happens. The downside of this approach is that the skb->cb[] buffer is about to be exhausted, however it is always able to allocate another memory area and keep a reference to it when needed. Fixes: 77cf8edbc0e7 ("tipc: simplify stale link failure criteria") Reported-by: Hoang Le Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- net/tipc/link.c | 92 ++++++++++++++++++++++++++++++++------------------------- net/tipc/msg.h | 8 +++-- 2 files changed, 57 insertions(+), 43 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 66d3a07bc571..c2c5c53cad22 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -106,8 +106,6 @@ struct tipc_stats { * @transmitq: queue for sent, non-acked messages * @backlogq: queue for messages waiting to be sent * @snt_nxt: next sequence number to use for outbound messages - * @prev_from: sequence number of most previous retransmission request - * @stale_limit: time when repeated identical retransmits must force link reset * @ackers: # of peers that needs to ack each packet before it can be released * @acked: # last packet acked by a certain peer. Used for broadcast. * @rcv_nxt: next sequence number to expect for inbound messages @@ -164,9 +162,7 @@ struct tipc_link { u16 limit; } backlog[5]; u16 snd_nxt; - u16 prev_from; u16 window; - unsigned long stale_limit; /* Reception */ u16 rcv_nxt; @@ -1044,47 +1040,53 @@ static void tipc_link_advance_backlog(struct tipc_link *l, * link_retransmit_failure() - Detect repeated retransmit failures * @l: tipc link sender * @r: tipc link receiver (= l in case of unicast) - * @from: seqno of the 1st packet in retransmit request * @rc: returned code * * Return: true if the repeated retransmit failures happens, otherwise * false */ static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r, - u16 from, int *rc) + int *rc) { struct sk_buff *skb = skb_peek(&l->transmq); struct tipc_msg *hdr; if (!skb) return false; - hdr = buf_msg(skb); - /* Detect repeated retransmit failures on same packet */ - if (r->prev_from != from) { - r->prev_from = from; - r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance); - } else if (time_after(jiffies, r->stale_limit)) { - pr_warn("Retransmission failure on link <%s>\n", l->name); - link_print(l, "State of link "); - pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", - msg_user(hdr), msg_type(hdr), msg_size(hdr), - msg_errcode(hdr)); - pr_info("sqno %u, prev: %x, src: %x\n", - msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr)); - - trace_tipc_list_dump(&l->transmq, true, "retrans failure!"); - trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!"); - trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!"); + if (!TIPC_SKB_CB(skb)->retr_cnt) + return false; - if (link_is_bc_sndlink(l)) - *rc = TIPC_LINK_DOWN_EVT; + if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp + + msecs_to_jiffies(r->tolerance))) + return false; + + hdr = buf_msg(skb); + if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr))) + return false; + pr_warn("Retransmission failure on link <%s>\n", l->name); + link_print(l, "State of link "); + pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", + msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr)); + pr_info("sqno %u, prev: %x, dest: %x\n", + msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr)); + pr_info("retr_stamp %d, retr_cnt %d\n", + jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp), + TIPC_SKB_CB(skb)->retr_cnt); + + trace_tipc_list_dump(&l->transmq, true, "retrans failure!"); + trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!"); + trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!"); + + if (link_is_bc_sndlink(l)) { + r->state = LINK_RESET; + *rc = TIPC_LINK_DOWN_EVT; + } else { *rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); - return true; } - return false; + return true; } /* tipc_link_bc_retrans() - retransmit zero or more packets @@ -1110,7 +1112,7 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r, trace_tipc_link_retrans(r, from, to, &l->transmq); - if (link_retransmit_failure(l, r, from, &rc)) + if (link_retransmit_failure(l, r, &rc)) return rc; skb_queue_walk(&l->transmq, skb) { @@ -1119,11 +1121,10 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r, continue; if (more(msg_seqno(hdr), to)) break; - if (link_is_bc_sndlink(l)) { - if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) - continue; - TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; - } + + if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) + continue; + TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, GFP_ATOMIC); if (!_skb) return 0; @@ -1133,6 +1134,10 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r, _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); l->stats.retransmitted++; + + /* Increase actual retrans counter & mark first time */ + if (!TIPC_SKB_CB(skb)->retr_cnt++) + TIPC_SKB_CB(skb)->retr_stamp = jiffies; } return 0; } @@ -1357,12 +1362,10 @@ static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap, struct tipc_msg *hdr; u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; u16 ack = l->rcv_nxt - 1; + bool passed = false; u16 seqno, n = 0; int rc = 0; - if (gap && link_retransmit_failure(l, l, acked + 1, &rc)) - return rc; - skb_queue_walk_safe(&l->transmq, skb, tmp) { seqno = buf_seqno(skb); @@ -1372,12 +1375,17 @@ next_gap_ack: __skb_unlink(skb, &l->transmq); kfree_skb(skb); } else if (less_eq(seqno, acked + gap)) { - /* retransmit skb */ + /* First, check if repeated retrans failures occurs? */ + if (!passed && link_retransmit_failure(l, l, &rc)) + return rc; + passed = true; + + /* retransmit skb if unrestricted*/ if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) continue; TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; - - _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC); + _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, + GFP_ATOMIC); if (!_skb) continue; hdr = buf_msg(_skb); @@ -1386,6 +1394,10 @@ next_gap_ack: _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); l->stats.retransmitted++; + + /* Increase actual retrans counter & mark first time */ + if (!TIPC_SKB_CB(skb)->retr_cnt++) + TIPC_SKB_CB(skb)->retr_stamp = jiffies; } else { /* retry with Gap ACK blocks if any */ if (!ga || n >= ga->gack_cnt) @@ -2577,7 +2589,7 @@ int tipc_link_dump(struct tipc_link *l, u16 dqueues, char *buf) i += scnprintf(buf + i, sz - i, " %x", l->peer_caps); i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt); i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt); - i += scnprintf(buf + i, sz - i, " %u", l->prev_from); + i += scnprintf(buf + i, sz - i, " %u", 0); i += scnprintf(buf + i, sz - i, " %u", 0); i += scnprintf(buf + i, sz - i, " %u", l->acked); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index da509f0eb9ca..d7ebc9e955f6 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -102,13 +102,15 @@ struct plist; #define TIPC_MEDIA_INFO_OFFSET 5 struct tipc_skb_cb { - u32 bytes_read; - u32 orig_member; struct sk_buff *tail; unsigned long nxt_retr; - bool validated; + unsigned long retr_stamp; + u32 bytes_read; + u32 orig_member; u16 chain_imp; u16 ackers; + u16 retr_cnt; + bool validated; }; #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) -- cgit v1.2.1 From 314e01a6d7ddf04608440beb087b21d8aa32f03f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Aug 2019 23:41:06 -0700 Subject: xfs: fall back to native ioctls for unhandled compat ones Always try the native ioctl if we don't have a compat handler. This removes a lot of boilerplate code as 'modern' ioctls should generally be compat clean, and fixes the missing entries for the recently added FS_IOC_GETFSLABEL/FS_IOC_SETFSLABEL ioctls. Fixes: f7664b31975b ("xfs: implement online get/set fs label") Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_ioctl32.c | 54 ++-------------------------------------------------- 1 file changed, 2 insertions(+), 52 deletions(-) diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 7fcf7569743f..bae08ef92ac3 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -553,57 +553,6 @@ xfs_file_compat_ioctl( trace_xfs_file_compat_ioctl(ip); switch (cmd) { - /* No size or alignment issues on any arch */ - case XFS_IOC_DIOINFO: - case XFS_IOC_FSGEOMETRY_V4: - case XFS_IOC_FSGEOMETRY: - case XFS_IOC_AG_GEOMETRY: - case XFS_IOC_FSGETXATTR: - case XFS_IOC_FSSETXATTR: - case XFS_IOC_FSGETXATTRA: - case XFS_IOC_FSSETDM: - case XFS_IOC_GETBMAP: - case XFS_IOC_GETBMAPA: - case XFS_IOC_GETBMAPX: - case XFS_IOC_FSCOUNTS: - case XFS_IOC_SET_RESBLKS: - case XFS_IOC_GET_RESBLKS: - case XFS_IOC_FSGROWFSLOG: - case XFS_IOC_GOINGDOWN: - case XFS_IOC_ERROR_INJECTION: - case XFS_IOC_ERROR_CLEARALL: - case FS_IOC_GETFSMAP: - case XFS_IOC_SCRUB_METADATA: - case XFS_IOC_BULKSTAT: - case XFS_IOC_INUMBERS: - return xfs_file_ioctl(filp, cmd, p); -#if !defined(BROKEN_X86_ALIGNMENT) || defined(CONFIG_X86_X32) - /* - * These are handled fine if no alignment issues. To support x32 - * which uses native 64-bit alignment we must emit these cases in - * addition to the ia-32 compat set below. - */ - case XFS_IOC_ALLOCSP: - case XFS_IOC_FREESP: - case XFS_IOC_RESVSP: - case XFS_IOC_UNRESVSP: - case XFS_IOC_ALLOCSP64: - case XFS_IOC_FREESP64: - case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP64: - case XFS_IOC_FSGEOMETRY_V1: - case XFS_IOC_FSGROWFSDATA: - case XFS_IOC_FSGROWFSRT: - case XFS_IOC_ZERO_RANGE: -#ifdef CONFIG_X86_X32 - /* - * x32 special: this gets a different cmd number from the ia-32 compat - * case below; the associated data will match native 64-bit alignment. - */ - case XFS_IOC_SWAPEXT: -#endif - return xfs_file_ioctl(filp, cmd, p); -#endif #if defined(BROKEN_X86_ALIGNMENT) case XFS_IOC_ALLOCSP_32: case XFS_IOC_FREESP_32: @@ -705,6 +654,7 @@ xfs_file_compat_ioctl( case XFS_IOC_FSSETDM_BY_HANDLE_32: return xfs_compat_fssetdm_by_handle(filp, arg); default: - return -ENOIOCTLCMD; + /* try the native version */ + return xfs_file_ioctl(filp, cmd, p); } } -- cgit v1.2.1 From 4529e6d7a6ab727aa85b1dd3cbfa9e82f10f730d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Aug 2019 23:41:06 -0700 Subject: xfs: compat_ioctl: use compat_ptr() For 31-bit s390 user space, we have to pass pointer arguments through compat_ptr() in the compat_ioctl handler. Signed-off-by: Arnd Bergmann Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_ioctl32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index bae08ef92ac3..7bd7534f5051 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -547,7 +547,7 @@ xfs_file_compat_ioctl( struct inode *inode = file_inode(filp); struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - void __user *arg = (void __user *)p; + void __user *arg = compat_ptr(p); int error; trace_xfs_file_compat_ioctl(ip); @@ -655,6 +655,6 @@ xfs_file_compat_ioctl( return xfs_compat_fssetdm_by_handle(filp, arg); default: /* try the native version */ - return xfs_file_ioctl(filp, cmd, p); + return xfs_file_ioctl(filp, cmd, (unsigned long)arg); } } -- cgit v1.2.1 From edc58dd0123b552453a74369bd0c8d890b497b4b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 11 Aug 2019 15:52:25 -0700 Subject: vfs: fix page locking deadlocks when deduping files When dedupe wants to use the page cache to compare parts of two files for dedupe, we must be very careful to handle locking correctly. The current code doesn't do this. It must lock and unlock the page only once if the two pages are the same, since the overlapping range check doesn't catch this when blocksize < pagesize. If the pages are distinct but from the same file, we must observe page locking order and lock them in order of increasing offset to avoid clashing with writeback locking. Fixes: 876bec6f9bbfcb3 ("vfs: refactor clone/dedupe_file_range common functions") Signed-off-by: Darrick J. Wong Reviewed-by: Bill O'Donnell Reviewed-by: Matthew Wilcox (Oracle) --- fs/read_write.c | 49 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 1f5088dec566..5bbf587f5bc1 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1811,10 +1811,7 @@ static int generic_remap_check_len(struct inode *inode_in, return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL; } -/* - * Read a page's worth of file data into the page cache. Return the page - * locked. - */ +/* Read a page's worth of file data into the page cache. */ static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset) { struct page *page; @@ -1826,10 +1823,32 @@ static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset) put_page(page); return ERR_PTR(-EIO); } - lock_page(page); return page; } +/* + * Lock two pages, ensuring that we lock in offset order if the pages are from + * the same file. + */ +static void vfs_lock_two_pages(struct page *page1, struct page *page2) +{ + /* Always lock in order of increasing index. */ + if (page1->index > page2->index) + swap(page1, page2); + + lock_page(page1); + if (page1 != page2) + lock_page(page2); +} + +/* Unlock two pages, being careful not to unlock the same page twice. */ +static void vfs_unlock_two_pages(struct page *page1, struct page *page2) +{ + unlock_page(page1); + if (page1 != page2) + unlock_page(page2); +} + /* * Compare extents of two files to see if they are the same. * Caller must have locked both inodes to prevent write races. @@ -1867,10 +1886,24 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, dest_page = vfs_dedupe_get_page(dest, destoff); if (IS_ERR(dest_page)) { error = PTR_ERR(dest_page); - unlock_page(src_page); put_page(src_page); goto out_error; } + + vfs_lock_two_pages(src_page, dest_page); + + /* + * Now that we've locked both pages, make sure they're still + * mapped to the file data we're interested in. If not, + * someone is invalidating pages on us and we lose. + */ + if (!PageUptodate(src_page) || !PageUptodate(dest_page) || + src_page->mapping != src->i_mapping || + dest_page->mapping != dest->i_mapping) { + same = false; + goto unlock; + } + src_addr = kmap_atomic(src_page); dest_addr = kmap_atomic(dest_page); @@ -1882,8 +1915,8 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, kunmap_atomic(dest_addr); kunmap_atomic(src_addr); - unlock_page(dest_page); - unlock_page(src_page); +unlock: + vfs_unlock_two_pages(src_page, dest_page); put_page(dest_page); put_page(src_page); -- cgit v1.2.1 From 12ece2d53d3e8f827e972caf497c165f7729c717 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 15 Aug 2019 11:16:24 -0700 Subject: x86/cpu: Explain Intel model naming convention Dave Hansen spelled out the rules in an e-mail: https://lkml.kernel.org/r/91eefbe4-e32b-d762-be4d-672ff915db47@intel.com Copy those right into the file to make it easy for people to find them. Suggested-by: Borislav Petkov Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Acked-by: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Dave Hansen Cc: Ingo Molnar Cc: x86-ml Link: https://lkml.kernel.org/r/20190815224704.GA10025@agluck-desk2.amr.corp.intel.com --- arch/x86/include/asm/intel-family.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 0278aa66ef62..fe7c205233f1 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -11,6 +11,21 @@ * While adding a new CPUID for a new microarchitecture, add a new * group to keep logically sorted out in chronological order. Within * that group keep the CPUID for the variants sorted by model number. + * + * The defined symbol names have the following form: + * INTEL_FAM6{OPTFAMILY}_{MICROARCH}{OPTDIFF} + * where: + * OPTFAMILY Describes the family of CPUs that this belongs to. Default + * is assumed to be "_CORE" (and should be omitted). Other values + * currently in use are _ATOM and _XEON_PHI + * MICROARCH Is the code name for the micro-architecture for this core. + * N.B. Not the platform name. + * OPTDIFF If needed, a short string to differentiate by market segment. + * Exact strings here will vary over time. _DESKTOP, _MOBILE, and + * _X (short for Xeon server) should be used when they are + * appropriate. + * + * The #define line may optionally include a comment including platform names. */ #define INTEL_FAM6_CORE_YONAH 0x0E -- cgit v1.2.1 From 58a96fc35375ab87db7c5b69336f5befde1b548f Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 16 Jul 2019 20:34:41 +0200 Subject: Bluetooth: Add debug setting for changing minimum encryption key size For testing and qualification purposes it is useful to allow changing the minimum encryption key size value that the host stack is going to enforce. This adds a new debugfs setting min_encrypt_key_size to achieve this functionality. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 1 + net/bluetooth/hci_debugfs.c | 31 +++++++++++++++++++++++++++++++ net/bluetooth/l2cap_core.c | 2 +- 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index ded574b32c20..ffc95b382eb5 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -278,6 +278,7 @@ struct hci_dev { __u16 conn_info_min_age; __u16 conn_info_max_age; __u16 auth_payload_timeout; + __u8 min_enc_key_size; __u8 ssp_debug_mode; __u8 hw_error_code; __u32 clock; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b9585e7d9d2e..04bc79359a17 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3202,6 +3202,7 @@ struct hci_dev *hci_alloc_dev(void) hdev->conn_info_min_age = DEFAULT_CONN_INFO_MIN_AGE; hdev->conn_info_max_age = DEFAULT_CONN_INFO_MAX_AGE; hdev->auth_payload_timeout = DEFAULT_AUTH_PAYLOAD_TIMEOUT; + hdev->min_enc_key_size = HCI_MIN_ENC_KEY_SIZE; mutex_init(&hdev->lock); mutex_init(&hdev->req_lock); diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index bb67f4a5479a..402e2cc54044 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -433,6 +433,35 @@ static int auto_accept_delay_set(void *data, u64 val) return 0; } +static int min_encrypt_key_size_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 1 || val > 16) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->min_enc_key_size = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int min_encrypt_key_size_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->min_enc_key_size; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(min_encrypt_key_size_fops, + min_encrypt_key_size_get, + min_encrypt_key_size_set, "%llu\n"); + static int auto_accept_delay_get(void *data, u64 *val) { struct hci_dev *hdev = data; @@ -545,6 +574,8 @@ void hci_debugfs_create_bredr(struct hci_dev *hdev) if (lmp_ssp_capable(hdev)) { debugfs_create_file("ssp_debug_mode", 0444, hdev->debugfs, hdev, &ssp_debug_mode_fops); + debugfs_create_file("min_encrypt_key_size", 0644, hdev->debugfs, + hdev, &min_encrypt_key_size_fops); debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs, hdev, &auto_accept_delay_fops); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index cc506fe99b4d..dfc1edb168b7 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1361,7 +1361,7 @@ static bool l2cap_check_enc_key_size(struct hci_conn *hcon) * actually encrypted before enforcing a key size. */ return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) || - hcon->enc_key_size >= HCI_MIN_ENC_KEY_SIZE); + hcon->enc_key_size >= hcon->hdev->min_enc_key_size); } static void l2cap_do_start(struct l2cap_chan *chan) -- cgit v1.2.1 From 4a4d2d372fb9b9229327e2ed01d5d9572eddf4de Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Sun, 18 Aug 2019 07:25:48 -0700 Subject: bnx2x: Fix VF's VLAN reconfiguration in reload. Commit 04f05230c5c13 ("bnx2x: Remove configured vlans as part of unload sequence."), introduced a regression in driver that as a part of VF's reload flow, VLANs created on the VF doesn't get re-configured in hardware as vlan metadata/info was not getting cleared for the VFs which causes vlan PING to stop. This patch clears the vlan metadata/info so that VLANs gets re-configured back in the hardware in VF's reload flow and PING/traffic continues for VLANs created over the VFs. Fixes: 04f05230c5c13 ("bnx2x: Remove configured vlans as part of unload sequence.") Signed-off-by: Manish Chopra Signed-off-by: Sudarsana Kalluru Signed-off-by: Shahed Shaikh Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 7 ++++--- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h | 2 ++ drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 17 ++++++++++++----- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index e47ea92e2ae3..d10b421ed1f1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3057,12 +3057,13 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) /* if VF indicate to PF this function is going down (PF will delete sp * elements and clear initializations */ - if (IS_VF(bp)) + if (IS_VF(bp)) { + bnx2x_clear_vlan_info(bp); bnx2x_vfpf_close_vf(bp); - else if (unload_mode != UNLOAD_RECOVERY) + } else if (unload_mode != UNLOAD_RECOVERY) { /* if this is a normal/close unload need to clean up chip*/ bnx2x_chip_cleanup(bp, unload_mode, keep_link); - else { + } else { /* Send the UNLOAD_REQUEST to the MCP */ bnx2x_send_unload_req(bp, unload_mode); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index c2f6e44e9a3f..8b08cb18e363 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -425,6 +425,8 @@ void bnx2x_set_reset_global(struct bnx2x *bp); void bnx2x_disable_close_the_gate(struct bnx2x *bp); int bnx2x_init_hw_func_cnic(struct bnx2x *bp); +void bnx2x_clear_vlan_info(struct bnx2x *bp); + /** * bnx2x_sp_event - handle ramrods completion. * diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 2cc14db8f0ec..192ff8d5da32 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -8482,11 +8482,21 @@ int bnx2x_set_vlan_one(struct bnx2x *bp, u16 vlan, return rc; } +void bnx2x_clear_vlan_info(struct bnx2x *bp) +{ + struct bnx2x_vlan_entry *vlan; + + /* Mark that hw forgot all entries */ + list_for_each_entry(vlan, &bp->vlan_reg, link) + vlan->hw = false; + + bp->vlan_cnt = 0; +} + static int bnx2x_del_all_vlans(struct bnx2x *bp) { struct bnx2x_vlan_mac_obj *vlan_obj = &bp->sp_objs[0].vlan_obj; unsigned long ramrod_flags = 0, vlan_flags = 0; - struct bnx2x_vlan_entry *vlan; int rc; __set_bit(RAMROD_COMP_WAIT, &ramrod_flags); @@ -8495,10 +8505,7 @@ static int bnx2x_del_all_vlans(struct bnx2x *bp) if (rc) return rc; - /* Mark that hw forgot all entries */ - list_for_each_entry(vlan, &bp->vlan_reg, link) - vlan->hw = false; - bp->vlan_cnt = 0; + bnx2x_clear_vlan_info(bp); return 0; } -- cgit v1.2.1 From 1eca92eef18719027d394bf1a2d276f43e7cf886 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 14 Aug 2019 13:03:38 -0500 Subject: cx82310_eth: fix a memory leak bug In cx82310_bind(), 'dev->partial_data' is allocated through kmalloc(). Then, the execution waits for the firmware to become ready. If the firmware is not ready in time, the execution is terminated. However, the allocated 'dev->partial_data' is not deallocated on this path, leading to a memory leak bug. To fix this issue, free 'dev->partial_data' before returning the error. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller --- drivers/net/usb/cx82310_eth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c index 5519248a791e..32b08b18e120 100644 --- a/drivers/net/usb/cx82310_eth.c +++ b/drivers/net/usb/cx82310_eth.c @@ -163,7 +163,8 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf) } if (!timeout) { dev_err(&udev->dev, "firmware not ready in time\n"); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto err; } /* enable ethernet mode (?) */ -- cgit v1.2.1 From f1472cb09f11ddb41d4be84f0650835cb65a9073 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 14 Aug 2019 13:56:43 -0500 Subject: net: kalmia: fix memory leaks In kalmia_init_and_get_ethernet_addr(), 'usb_buf' is allocated through kmalloc(). In the following execution, if the 'status' returned by kalmia_send_init_packet() is not 0, 'usb_buf' is not deallocated, leading to memory leaks. To fix this issue, add the 'out' label to free 'usb_buf'. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller --- drivers/net/usb/kalmia.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c index d62b6706a537..fc5895f85cee 100644 --- a/drivers/net/usb/kalmia.c +++ b/drivers/net/usb/kalmia.c @@ -113,16 +113,16 @@ kalmia_init_and_get_ethernet_addr(struct usbnet *dev, u8 *ethernet_addr) status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_1), usb_buf, 24); if (status != 0) - return status; + goto out; memcpy(usb_buf, init_msg_2, 12); status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_2), usb_buf, 28); if (status != 0) - return status; + goto out; memcpy(ethernet_addr, usb_buf + 10, ETH_ALEN); - +out: kfree(usb_buf); return status; } -- cgit v1.2.1 From a46ecb116fb7f722fa8cb2da01959c36e4e10c41 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 17 Aug 2019 17:04:47 -0400 Subject: bnxt_en: Fix VNIC clearing logic for 57500 chips. During device shutdown, the VNIC clearing sequence needs to be modified to free the VNIC first before freeing the RSS contexts. The current code is doing the reverse and we can get mis-directed RX completions to CP ring ID 0 when the RSS contexts are freed and zeroed. The clearing of RSS contexts is not required with the new sequence. Refactor the VNIC clearing logic into a new function bnxt_clear_vnic() and do the chip specific VNIC clearing sequence. Fixes: 7b3af4f75b81 ("bnxt_en: Add RSS support for 57500 chips.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7070349915bc..1ef224fbe302 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7016,19 +7016,29 @@ static void bnxt_hwrm_clear_vnic_rss(struct bnxt *bp) bnxt_hwrm_vnic_set_rss(bp, i, false); } -static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path, - bool irq_re_init) +static void bnxt_clear_vnic(struct bnxt *bp) { - if (bp->vnic_info) { - bnxt_hwrm_clear_vnic_filter(bp); + if (!bp->vnic_info) + return; + + bnxt_hwrm_clear_vnic_filter(bp); + if (!(bp->flags & BNXT_FLAG_CHIP_P5)) { /* clear all RSS setting before free vnic ctx */ bnxt_hwrm_clear_vnic_rss(bp); bnxt_hwrm_vnic_ctx_free(bp); - /* before free the vnic, undo the vnic tpa settings */ - if (bp->flags & BNXT_FLAG_TPA) - bnxt_set_tpa(bp, false); - bnxt_hwrm_vnic_free(bp); } + /* before free the vnic, undo the vnic tpa settings */ + if (bp->flags & BNXT_FLAG_TPA) + bnxt_set_tpa(bp, false); + bnxt_hwrm_vnic_free(bp); + if (bp->flags & BNXT_FLAG_CHIP_P5) + bnxt_hwrm_vnic_ctx_free(bp); +} + +static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path, + bool irq_re_init) +{ + bnxt_clear_vnic(bp); bnxt_hwrm_ring_free(bp, close_path); bnxt_hwrm_ring_grp_free(bp); if (irq_re_init) { -- cgit v1.2.1 From e8f267b063208372f7a329c6d5288d58944d873c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 17 Aug 2019 17:04:48 -0400 Subject: bnxt_en: Improve RX doorbell sequence. When both RX buffers and RX aggregation buffers have to be replenished at the end of NAPI, post the RX aggregation buffers first before RX buffers. Otherwise, we may run into a situation where there are only RX buffers without RX aggregation buffers for a split second. This will cause the hardware to abort the RX packet and report buffer errors, which will cause unnecessary cleanup by the driver. Ringing the Aggregation ring doorbell first before the RX ring doorbell will prevent some of these buffer errors. Use the same sequence during ring initialization as well. Fixes: 697197e5a173 ("bnxt_en: Re-structure doorbells.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1ef224fbe302..8dce4069472b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2021,9 +2021,9 @@ static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi) if (bnapi->events & BNXT_RX_EVENT) { struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; - bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); if (bnapi->events & BNXT_AGG_EVENT) bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); + bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); } bnapi->events = 0; } @@ -5064,6 +5064,7 @@ static void bnxt_set_db(struct bnxt *bp, struct bnxt_db_info *db, u32 ring_type, static int bnxt_hwrm_ring_alloc(struct bnxt *bp) { + bool agg_rings = !!(bp->flags & BNXT_FLAG_AGG_RINGS); int i, rc = 0; u32 type; @@ -5139,7 +5140,9 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp) if (rc) goto err_out; bnxt_set_db(bp, &rxr->rx_db, type, map_idx, ring->fw_ring_id); - bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); + /* If we have agg rings, post agg buffers first. */ + if (!agg_rings) + bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); bp->grp_info[map_idx].rx_fw_ring_id = ring->fw_ring_id; if (bp->flags & BNXT_FLAG_CHIP_P5) { struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; @@ -5158,7 +5161,7 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp) } } - if (bp->flags & BNXT_FLAG_AGG_RINGS) { + if (agg_rings) { type = HWRM_RING_ALLOC_AGG; for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; @@ -5174,6 +5177,7 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp) bnxt_set_db(bp, &rxr->rx_agg_db, type, map_idx, ring->fw_ring_id); bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); + bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); bp->grp_info[grp_idx].agg_fw_ring_id = ring->fw_ring_id; } } -- cgit v1.2.1 From dd2ebf3404c7c295014bc025dea23960960ceb1a Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sat, 17 Aug 2019 17:04:49 -0400 Subject: bnxt_en: Fix handling FRAG_ERR when NVM_INSTALL_UPDATE cmd fails If FW returns FRAG_ERR in response error code, driver is resending the command only when HWRM command returns success. Fix the code to resend NVM_INSTALL_UPDATE command with DEFRAG install flags, if FW returns FRAG_ERR in its response error code. Fixes: cb4d1d626145 ("bnxt_en: Retry failed NVM_INSTALL_UPDATE with defragmentation flag enabled.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index c7ee63d69679..8445a0cce849 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2016,21 +2016,19 @@ static int bnxt_flash_package_from_file(struct net_device *dev, mutex_lock(&bp->hwrm_cmd_lock); hwrm_err = _hwrm_send_message(bp, &install, sizeof(install), INSTALL_PACKAGE_TIMEOUT); - if (hwrm_err) - goto flash_pkg_exit; - - if (resp->error_code) { + if (hwrm_err) { u8 error_code = ((struct hwrm_err_output *)resp)->cmd_err; - if (error_code == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { + if (resp->error_code && error_code == + NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { install.flags |= cpu_to_le16( NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG); hwrm_err = _hwrm_send_message(bp, &install, sizeof(install), INSTALL_PACKAGE_TIMEOUT); - if (hwrm_err) - goto flash_pkg_exit; } + if (hwrm_err) + goto flash_pkg_exit; } if (resp->result) { -- cgit v1.2.1 From b703ba751dbb4bcd086509ed4b28102bc1670b35 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sat, 17 Aug 2019 17:04:50 -0400 Subject: bnxt_en: Suppress HWRM errors for HWRM_NVM_GET_VARIABLE command For newly added NVM parameters, older firmware may not have the support. Suppress the error message to avoid the unncessary error message which is triggered when devlink calls the driver during initialization. Fixes: 782a624d00fa ("bnxt_en: Add bnxt_en initial params table and register it.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 549c90d3e465..c05d663212b2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -98,10 +98,13 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, if (idx) req->dimensions = cpu_to_le16(1); - if (req->req_type == cpu_to_le16(HWRM_NVM_SET_VARIABLE)) + if (req->req_type == cpu_to_le16(HWRM_NVM_SET_VARIABLE)) { memcpy(data_addr, buf, bytesize); - - rc = hwrm_send_message(bp, msg, msg_len, HWRM_CMD_TIMEOUT); + rc = hwrm_send_message(bp, msg, msg_len, HWRM_CMD_TIMEOUT); + } else { + rc = hwrm_send_message_silent(bp, msg, msg_len, + HWRM_CMD_TIMEOUT); + } if (!rc && req->req_type == cpu_to_le16(HWRM_NVM_GET_VARIABLE)) memcpy(buf, data_addr, bytesize); -- cgit v1.2.1 From 685ec6a81bb0d47faf1dba49437d5bdaede2733d Mon Sep 17 00:00:00 2001 From: Venkat Duvvuru Date: Sat, 17 Aug 2019 17:04:51 -0400 Subject: bnxt_en: Use correct src_fid to determine direction of the flow Direction of the flow is determined using src_fid. For an RX flow, src_fid is PF's fid and for TX flow, src_fid is VF's fid. Direction of the flow must be specified, when getting statistics for that flow. Currently, for DECAP flow, direction is determined incorrectly, i.e., direction is initialized as TX for DECAP flow, instead of RX. Because of which, stats are not reported for this DECAP flow, though it is offloaded and there is traffic for that flow, resulting in flow age out. This patch fixes the problem by determining the DECAP flow's direction using correct fid. Set the flow direction in all cases for consistency even if 64-bit flow handle is not used. Fixes: abd43a13525d ("bnxt_en: Support for 64-bit flow handle.") Signed-off-by: Venkat Duvvuru Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 6fe4a7174271..6224c30f8821 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1285,9 +1285,7 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, goto free_node; bnxt_tc_set_src_fid(bp, flow, src_fid); - - if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) - bnxt_tc_set_flow_dir(bp, flow, src_fid); + bnxt_tc_set_flow_dir(bp, flow, flow->src_fid); if (!bnxt_tc_can_offload(bp, flow)) { rc = -EOPNOTSUPP; -- cgit v1.2.1 From 9bf46566e80fd94845527d01ebd888eb49313551 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Sat, 17 Aug 2019 17:04:52 -0400 Subject: bnxt_en: Fix to include flow direction in L2 key FW expects the driver to provide unique flow reference handles for Tx or Rx flows. When a Tx flow and an Rx flow end up sharing a reference handle, flow offload does not seem to work. This could happen in the case of 2 flows having their L2 fields wildcarded but in different direction. Fix to incorporate the flow direction as part of the L2 key v2: Move the dir field to the end of the bnxt_tc_l2_key struct to fix the warning reported by kbuild test robot . There is existing code that initializes the structure using nested initializer and will warn with the new u8 field added to the beginning. The structure also packs nicer when this new u8 is added to the end of the structure [MChan]. Fixes: abd43a13525d ("bnxt_en: Support for 64-bit flow handle.") Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 6224c30f8821..dd621f6bd127 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1236,7 +1236,7 @@ static int __bnxt_tc_del_flow(struct bnxt *bp, static void bnxt_tc_set_flow_dir(struct bnxt *bp, struct bnxt_tc_flow *flow, u16 src_fid) { - flow->dir = (bp->pf.fw_fid == src_fid) ? BNXT_DIR_RX : BNXT_DIR_TX; + flow->l2_key.dir = (bp->pf.fw_fid == src_fid) ? BNXT_DIR_RX : BNXT_DIR_TX; } static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow, @@ -1405,7 +1405,7 @@ static void bnxt_fill_cfa_stats_req(struct bnxt *bp, * 2. 15th bit of flow_handle must specify the flow * direction (TX/RX). */ - if (flow_node->flow.dir == BNXT_DIR_RX) + if (flow_node->flow.l2_key.dir == BNXT_DIR_RX) handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_DIR_RX | CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK; else diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h index ffec57d1a5ec..4f05305052f2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h @@ -23,6 +23,9 @@ struct bnxt_tc_l2_key { __be16 inner_vlan_tci; __be16 ether_type; u8 num_vlans; + u8 dir; +#define BNXT_DIR_RX 1 +#define BNXT_DIR_TX 0 }; struct bnxt_tc_l3_key { @@ -98,9 +101,6 @@ struct bnxt_tc_flow { /* flow applicable to pkts ingressing on this fid */ u16 src_fid; - u8 dir; -#define BNXT_DIR_RX 1 -#define BNXT_DIR_TX 0 struct bnxt_tc_l2_key l2_key; struct bnxt_tc_l2_key l2_mask; struct bnxt_tc_l3_key l3_key; -- cgit v1.2.1 From 80f0fe0934cd3daa13a5e4d48a103f469115b160 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 14 Aug 2019 14:57:05 -0500 Subject: ibmvnic: Unmap DMA address of TX descriptor buffers after use There's no need to wait until a completion is received to unmap TX descriptor buffers that have been passed to the hypervisor. Instead unmap it when the hypervisor call has completed. This patch avoids the possibility that a buffer will not be unmapped because a TX completion is lost or mishandled. Reported-by: Abdul Haleem Tested-by: Devesh K. Singh Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 3da680073265..cebd20f3128d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1568,6 +1568,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num], (u64)tx_buff->indir_dma, (u64)num_entries); + dma_unmap_single(dev, tx_buff->indir_dma, + sizeof(tx_buff->indir_arr), DMA_TO_DEVICE); } else { tx_buff->num_entries = num_entries; lpar_rc = send_subcrq(adapter, handle_array[queue_num], @@ -2788,7 +2790,6 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, union sub_crq *next; int index; int i, j; - u8 *first; restart_loop: while (pending_scrq(adapter, scrq)) { @@ -2818,14 +2819,6 @@ restart_loop: txbuff->data_dma[j] = 0; } - /* if sub_crq was sent indirectly */ - first = &txbuff->indir_arr[0].generic.first; - if (*first == IBMVNIC_CRQ_CMD) { - dma_unmap_single(dev, txbuff->indir_dma, - sizeof(txbuff->indir_arr), - DMA_TO_DEVICE); - *first = 0; - } if (txbuff->last_frag) { dev_kfree_skb_any(txbuff->skb); -- cgit v1.2.1 From 3434341004a380f4e47c3a03d4320d43982162a0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 15 Aug 2019 12:49:49 -0700 Subject: net: cavium: fix driver name The driver name gets exposed in sysfs under /sys/bus/pci/drivers so it should look like other devices. Change it to be common format (instead of "Cavium PTP"). This is a trivial fix that was observed by accident because Debian kernels were building this driver into kernel (bug). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/common/cavium_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/common/cavium_ptp.c b/drivers/net/ethernet/cavium/common/cavium_ptp.c index 73632b843749..b821c9e1604c 100644 --- a/drivers/net/ethernet/cavium/common/cavium_ptp.c +++ b/drivers/net/ethernet/cavium/common/cavium_ptp.c @@ -10,7 +10,7 @@ #include "cavium_ptp.h" -#define DRV_NAME "Cavium PTP Driver" +#define DRV_NAME "cavium_ptp" #define PCI_DEVICE_ID_CAVIUM_PTP 0xA00C #define PCI_DEVICE_ID_CAVIUM_RST 0xA00E -- cgit v1.2.1 From 44ef3a03252844a8753479b0cea7f29e4a804bdc Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Thu, 15 Aug 2019 15:29:51 -0500 Subject: wimax/i2400m: fix a memory leak bug In i2400m_barker_db_init(), 'options_orig' is allocated through kstrdup() to hold the original command line options. Then, the options are parsed. However, if an error occurs during the parsing process, 'options_orig' is not deallocated, leading to a memory leak bug. To fix this issue, free 'options_orig' before returning the error. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/fw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c index e9fc168bb734..489cba9b284d 100644 --- a/drivers/net/wimax/i2400m/fw.c +++ b/drivers/net/wimax/i2400m/fw.c @@ -351,13 +351,15 @@ int i2400m_barker_db_init(const char *_options) } result = i2400m_barker_db_add(barker); if (result < 0) - goto error_add; + goto error_parse_add; } kfree(options_orig); } return 0; +error_parse_add: error_parse: + kfree(options_orig); error_add: kfree(i2400m_barker_db); return result; -- cgit v1.2.1 From ef01adae0e43cfb2468d0ea07137cc63cf31495c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Aug 2019 03:24:09 +0200 Subject: net: sched: use major priority number as hardware priority tc transparently maps the software priority number to hardware. Update it to pass the major priority which is what most drivers expect. Update drivers too so they do not need to lshift the priority field of the flow_cls_common_offload object. The stmmac driver is an exception, since this code assumes the tc software priority is fine, therefore, lshift it just to be conservative. Signed-off-by: Pablo Neira Ayuso Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 2 +- drivers/net/ethernet/mscc/ocelot_flower.c | 12 +++--------- drivers/net/ethernet/netronome/nfp/flower/qos_conf.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 2 +- include/net/pkt_cls.h | 2 +- 6 files changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index deeb65da99f3..00b2d4a86159 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3167,7 +3167,7 @@ mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr, esw_attr->parse_attr = parse_attr; esw_attr->chain = f->common.chain_index; - esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; + esw_attr->prio = f->common.prio; esw_attr->in_rep = in_rep; esw_attr->in_mdev = in_mdev; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index e8ac90564dbe..84a87d059333 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -471,7 +471,7 @@ int mlxsw_sp_acl_rulei_commit(struct mlxsw_sp_acl_rule_info *rulei) void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei, unsigned int priority) { - rulei->priority = priority >> 16; + rulei->priority = priority; } void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei, diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 59487d446a09..b894bc0c9c16 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -13,12 +13,6 @@ struct ocelot_port_block { struct ocelot_port *port; }; -static u16 get_prio(u32 prio) -{ - /* prio starts from 0x1000 while the ids starts from 0 */ - return prio >> 16; -} - static int ocelot_flower_parse_action(struct flow_cls_offload *f, struct ocelot_ace_rule *rule) { @@ -168,7 +162,7 @@ static int ocelot_flower_parse(struct flow_cls_offload *f, } finished_key_parsing: - ocelot_rule->prio = get_prio(f->common.prio); + ocelot_rule->prio = f->common.prio; ocelot_rule->id = f->cookie; return ocelot_flower_parse_action(f, ocelot_rule); } @@ -218,7 +212,7 @@ static int ocelot_flower_destroy(struct flow_cls_offload *f, struct ocelot_ace_rule rule; int ret; - rule.prio = get_prio(f->common.prio); + rule.prio = f->common.prio; rule.port = port_block->port; rule.id = f->cookie; @@ -236,7 +230,7 @@ static int ocelot_flower_stats_update(struct flow_cls_offload *f, struct ocelot_ace_rule rule; int ret; - rule.prio = get_prio(f->common.prio); + rule.prio = f->common.prio; rule.port = port_block->port; rule.id = f->cookie; ret = ocelot_ace_rule_stats_update(&rule); diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c index 86e968cd5ffd..124a43dc136a 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c @@ -93,7 +93,7 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, return -EOPNOTSUPP; } - if (flow->common.prio != (1 << 16)) { + if (flow->common.prio != 1) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload requires highest priority"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 37c0bc699cd9..6c305b6ecad0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -94,7 +94,7 @@ static int tc_fill_entry(struct stmmac_priv *priv, struct stmmac_tc_entry *entry, *frag = NULL; struct tc_u32_sel *sel = cls->knode.sel; u32 off, data, mask, real_off, rem; - u32 prio = cls->common.prio; + u32 prio = cls->common.prio << 16; int ret; /* Only 1 match per entry */ diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e429809ca90d..98be18ef1ed3 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -646,7 +646,7 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common, { cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; - cls_common->prio = tp->prio; + cls_common->prio = tp->prio >> 16; if (tc_skip_sw(flags) || flags & TCA_CLS_FLAGS_VERBOSE) cls_common->extack = extack; } -- cgit v1.2.1 From 3bc158f8d0330f0ac58597c023acca2234c14616 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Aug 2019 03:24:10 +0200 Subject: netfilter: nf_tables: map basechain priority to hardware priority This patch adds initial support for offloading basechains using the priority range from 1 to 65535. This is restricting the netfilter priority range to 16-bit integer since this is what most drivers assume so far from tc. It should be possible to extend this range of supported priorities later on once drivers are updated to support for 32-bit integer priorities. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/net/netfilter/nf_tables_offload.h | 2 ++ net/netfilter/nf_tables_api.c | 4 ++++ net/netfilter/nf_tables_offload.c | 17 ++++++++++++++--- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index 3196663a10e3..c8b9dec376f5 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -73,4 +73,6 @@ int nft_flow_rule_offload_commit(struct net *net); (__reg)->key = __key; \ memset(&(__reg)->mask, 0xff, (__reg)->len); +int nft_chain_offload_priority(struct nft_base_chain *basechain); + #endif diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 88abbddf8967..d47469f824a1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1667,6 +1667,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, chain->flags |= NFT_BASE_CHAIN | flags; basechain->policy = NF_ACCEPT; + if (chain->flags & NFT_CHAIN_HW_OFFLOAD && + nft_chain_offload_priority(basechain) < 0) + return -EOPNOTSUPP; + flow_block_init(&basechain->flow_block); } else { chain = kzalloc(sizeof(*chain), GFP_KERNEL); diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 64f5fd5f240e..c0d18c1d77ac 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -103,10 +103,11 @@ void nft_offload_update_dependency(struct nft_offload_ctx *ctx, } static void nft_flow_offload_common_init(struct flow_cls_common_offload *common, - __be16 proto, - struct netlink_ext_ack *extack) + __be16 proto, int priority, + struct netlink_ext_ack *extack) { common->protocol = proto; + common->prio = priority; common->extack = extack; } @@ -124,6 +125,15 @@ static int nft_setup_cb_call(struct nft_base_chain *basechain, return 0; } +int nft_chain_offload_priority(struct nft_base_chain *basechain) +{ + if (basechain->ops.priority <= 0 || + basechain->ops.priority > USHRT_MAX) + return -1; + + return 0; +} + static int nft_flow_offload_rule(struct nft_trans *trans, enum flow_cls_command command) { @@ -142,7 +152,8 @@ static int nft_flow_offload_rule(struct nft_trans *trans, if (flow) proto = flow->proto; - nft_flow_offload_common_init(&cls_flow.common, proto, &extack); + nft_flow_offload_common_init(&cls_flow.common, proto, + basechain->ops.priority, &extack); cls_flow.command = command; cls_flow.cookie = (unsigned long) rule; if (flow) -- cgit v1.2.1 From cfef46d692efd852a0da6803f920cc756eea2855 Mon Sep 17 00:00:00 2001 From: Tho Vu Date: Fri, 16 Aug 2019 17:17:02 +0200 Subject: ravb: Fix use-after-free ravb_tstamp_skb When a Tx timestamp is requested, a pointer to the skb is stored in the ravb_tstamp_skb struct. This was done without an skb_get. There exists the possibility that the skb could be freed by ravb_tx_free (when ravb_tx_free is called from ravb_start_xmit) before the timestamp was processed, leading to a use-after-free bug. Use skb_get when filling a ravb_tstamp_skb struct, and add appropriate frees/consumes when a ravb_tstamp_skb struct is freed. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Tho Vu Signed-off-by: Kazuya Mizuguchi Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index ef8f08931fe8..6cacd5e893ac 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Renesas Ethernet AVB device driver * - * Copyright (C) 2014-2015 Renesas Electronics Corporation + * Copyright (C) 2014-2019 Renesas Electronics Corporation * Copyright (C) 2015 Renesas Solutions Corp. * Copyright (C) 2015-2016 Cogent Embedded, Inc. * @@ -513,7 +513,10 @@ static void ravb_get_tx_tstamp(struct net_device *ndev) kfree(ts_skb); if (tag == tfa_tag) { skb_tstamp_tx(skb, &shhwtstamps); + dev_consume_skb_any(skb); break; + } else { + dev_kfree_skb_any(skb); } } ravb_modify(ndev, TCCR, TCCR_TFR, TCCR_TFR); @@ -1564,7 +1567,7 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) } goto unmap; } - ts_skb->skb = skb; + ts_skb->skb = skb_get(skb); ts_skb->tag = priv->ts_skb_tag++; priv->ts_skb_tag &= 0x3ff; list_add_tail(&ts_skb->list, &priv->ts_skb_list); @@ -1693,6 +1696,7 @@ static int ravb_close(struct net_device *ndev) /* Clear the timestamp list */ list_for_each_entry_safe(ts_skb, ts_skb2, &priv->ts_skb_list, list) { list_del(&ts_skb->list); + kfree_skb(ts_skb->skb); kfree(ts_skb); } -- cgit v1.2.1 From 165d42c012be69900f0e2f8545626cb9e7d4a832 Mon Sep 17 00:00:00 2001 From: Nishka Dasgupta Date: Sat, 6 Jul 2019 19:00:21 +0530 Subject: drm/mediatek: mtk_drm_drv.c: Add of_node_put() before goto Each iteration of for_each_child_of_node puts the previous node, but in the case of a goto from the middle of the loop, there is no put, thus causing a memory leak. Hence add an of_node_put before the goto in two places. Issue found with Coccinelle. Fixes: 119f5173628a (drm/mediatek: Add DRM Driver for Mediatek SoC MT8173) Signed-off-by: Nishka Dasgupta Signed-off-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index c021d4c8324f..7f5408cb2377 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -567,12 +567,15 @@ static int mtk_drm_probe(struct platform_device *pdev) comp = devm_kzalloc(dev, sizeof(*comp), GFP_KERNEL); if (!comp) { ret = -ENOMEM; + of_node_put(node); goto err_node; } ret = mtk_ddp_comp_init(dev, node, comp, comp_id, NULL); - if (ret) + if (ret) { + of_node_put(node); goto err_node; + } private->ddp_comp[comp_id] = comp; } -- cgit v1.2.1 From dec43da46f63eb71f519d963ba6832838e4262a3 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Thu, 25 Jul 2019 10:48:45 +0800 Subject: fpga: altera-ps-spi: Fix getting of optional confd gpio Currently the driver does not handle EPROBE_DEFER for the confd gpio. Use devm_gpiod_get_optional() instead of devm_gpiod_get() and return error codes from altera_ps_probe(). Fixes: 5692fae0742d ("fpga manager: Add altera-ps-spi driver for Altera FPGAs") Signed-off-by: Phil Reid Signed-off-by: Moritz Fischer --- drivers/fpga/altera-ps-spi.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/fpga/altera-ps-spi.c b/drivers/fpga/altera-ps-spi.c index a13f224303c6..0221dee8dd4c 100644 --- a/drivers/fpga/altera-ps-spi.c +++ b/drivers/fpga/altera-ps-spi.c @@ -210,7 +210,7 @@ static int altera_ps_write_complete(struct fpga_manager *mgr, return -EIO; } - if (!IS_ERR(conf->confd)) { + if (conf->confd) { if (!gpiod_get_raw_value_cansleep(conf->confd)) { dev_err(&mgr->dev, "CONF_DONE is inactive!\n"); return -EIO; @@ -289,10 +289,13 @@ static int altera_ps_probe(struct spi_device *spi) return PTR_ERR(conf->status); } - conf->confd = devm_gpiod_get(&spi->dev, "confd", GPIOD_IN); + conf->confd = devm_gpiod_get_optional(&spi->dev, "confd", GPIOD_IN); if (IS_ERR(conf->confd)) { - dev_warn(&spi->dev, "Not using confd gpio: %ld\n", - PTR_ERR(conf->confd)); + dev_err(&spi->dev, "Failed to get confd gpio: %ld\n", + PTR_ERR(conf->confd)); + return PTR_ERR(conf->confd); + } else if (!conf->confd) { + dev_warn(&spi->dev, "Not using confd gpio"); } /* Register manager with unique name */ -- cgit v1.2.1 From 5d888b481e6abc726b36c86f0bf13af1583bb336 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 14 Aug 2019 17:38:09 -0700 Subject: xfs: fix reflink source file racing with directio writes While trawling through the dedupe file comparison code trying to fix page deadlocking problems, Dave Chinner noticed that the reflink code only takes shared IOLOCK/MMAPLOCKs on the source file. Because page_mkwrite and directio writes do not take the EXCL versions of those locks, this means that reflink can race with writer processes. For pure remapping this can lead to undefined behavior and file corruption; for dedupe this means that we cannot be sure that the contents are identical when we decide to go ahead with the remapping. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 63 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index c4ec7afd1170..edbe37b7f636 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1190,11 +1190,11 @@ xfs_reflink_remap_blocks( } /* - * Grab the exclusive iolock for a data copy from src to dest, making - * sure to abide vfs locking order (lowest pointer value goes first) and - * breaking the pnfs layout leases on dest before proceeding. The loop - * is needed because we cannot call the blocking break_layout() with the - * src iolock held, and therefore have to back out both locks. + * Grab the exclusive iolock for a data copy from src to dest, making sure to + * abide vfs locking order (lowest pointer value goes first) and breaking the + * layout leases before proceeding. The loop is needed because we cannot call + * the blocking break_layout() with the iolocks held, and therefore have to + * back out both locks. */ static int xfs_iolock_two_inodes_and_break_layout( @@ -1203,33 +1203,44 @@ xfs_iolock_two_inodes_and_break_layout( { int error; -retry: - if (src < dest) { - inode_lock_shared(src); - inode_lock_nested(dest, I_MUTEX_NONDIR2); - } else { - /* src >= dest */ - inode_lock(dest); - } + if (src > dest) + swap(src, dest); - error = break_layout(dest, false); - if (error == -EWOULDBLOCK) { - inode_unlock(dest); - if (src < dest) - inode_unlock_shared(src); +retry: + /* Wait to break both inodes' layouts before we start locking. */ + error = break_layout(src, true); + if (error) + return error; + if (src != dest) { error = break_layout(dest, true); if (error) return error; - goto retry; } + + /* Lock one inode and make sure nobody got in and leased it. */ + inode_lock(src); + error = break_layout(src, false); if (error) { + inode_unlock(src); + if (error == -EWOULDBLOCK) + goto retry; + return error; + } + + if (src == dest) + return 0; + + /* Lock the other inode and make sure nobody got in and leased it. */ + inode_lock_nested(dest, I_MUTEX_NONDIR2); + error = break_layout(dest, false); + if (error) { + inode_unlock(src); inode_unlock(dest); - if (src < dest) - inode_unlock_shared(src); + if (error == -EWOULDBLOCK) + goto retry; return error; } - if (src > dest) - inode_lock_shared_nested(src, I_MUTEX_NONDIR2); + return 0; } @@ -1247,10 +1258,10 @@ xfs_reflink_remap_unlock( xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); if (!same_inode) - xfs_iunlock(src, XFS_MMAPLOCK_SHARED); + xfs_iunlock(src, XFS_MMAPLOCK_EXCL); inode_unlock(inode_out); if (!same_inode) - inode_unlock_shared(inode_in); + inode_unlock(inode_in); } /* @@ -1325,7 +1336,7 @@ xfs_reflink_remap_prep( if (same_inode) xfs_ilock(src, XFS_MMAPLOCK_EXCL); else - xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest, + xfs_lock_two_inodes(src, XFS_MMAPLOCK_EXCL, dest, XFS_MMAPLOCK_EXCL); /* Check file eligibility and prepare for block sharing. */ -- cgit v1.2.1 From 0d7342c3637462fc6291b392ced9af0f4ca4dab4 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Wed, 14 Aug 2019 15:35:02 +0300 Subject: MAINTAINERS: Remove IP MASQUERADING record This entry is in MAINTAINERS for historical purpose. It doesn't match current sources since the commit adf82accc5f5 ("netfilter: x_tables: merge ip and ipv6 masquerade modules") moved the module. The net/netfilter/xt_MASQUERADE.c module is already under the netfilter section. Thus, there is no purpose to keep this separate entry in MAINTAINERS. Cc: Florian Westphal Cc: Juanjo Ciarlante Cc: netfilter-devel@vger.kernel.org Suggested-by: Pablo Neira Ayuso Signed-off-by: Denis Efremov Signed-off-by: Pablo Neira Ayuso --- MAINTAINERS | 5 ----- 1 file changed, 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a416574780d6..6839cfd91dde 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8439,11 +8439,6 @@ S: Maintained F: fs/io_uring.c F: include/uapi/linux/io_uring.h -IP MASQUERADING -M: Juanjo Ciarlante -S: Maintained -F: net/ipv4/netfilter/ipt_MASQUERADE.c - IPMI SUBSYSTEM M: Corey Minyard L: openipmi-developer@lists.sourceforge.net (moderated for non-subscribers) -- cgit v1.2.1 From f20faa06d83de440bec8e200870784c3458793c4 Mon Sep 17 00:00:00 2001 From: Todd Seidelmann Date: Wed, 14 Aug 2019 10:54:16 -0400 Subject: netfilter: ebtables: Fix argument order to ADD_COUNTER The ordering of arguments to the x_tables ADD_COUNTER macro appears to be wrong in ebtables (cf. ip_tables.c, ip6_tables.c, and arp_tables.c). This causes data corruption in the ebtables userspace tools because they get incorrect packet & byte counts from the kernel. Fixes: d72133e628803 ("netfilter: ebtables: use ADD_COUNTER macro") Signed-off-by: Todd Seidelmann Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index c8177a89f52c..4096d8a74a2b 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -221,7 +221,7 @@ unsigned int ebt_do_table(struct sk_buff *skb, return NF_DROP; } - ADD_COUNTER(*(counter_base + i), 1, skb->len); + ADD_COUNTER(*(counter_base + i), skb->len, 1); /* these should only watch: not modify, nor tell us * what to do with the packet @@ -959,8 +959,8 @@ static void get_counters(const struct ebt_counter *oldcounters, continue; counter_base = COUNTER_BASE(oldcounters, nentries, cpu); for (i = 0; i < nentries; i++) - ADD_COUNTER(counters[i], counter_base[i].pcnt, - counter_base[i].bcnt); + ADD_COUNTER(counters[i], counter_base[i].bcnt, + counter_base[i].pcnt); } } @@ -1280,7 +1280,7 @@ static int do_update_counters(struct net *net, const char *name, /* we add to the counters of the first cpu */ for (i = 0; i < num_counters; i++) - ADD_COUNTER(t->private->counters[i], tmp[i].pcnt, tmp[i].bcnt); + ADD_COUNTER(t->private->counters[i], tmp[i].bcnt, tmp[i].pcnt); write_unlock_bh(&t->lock); ret = 0; -- cgit v1.2.1 From 14c415862c0630e01712a4eeaf6159a2b1b6d2a4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Aug 2019 11:23:58 +0200 Subject: netfilter: nft_flow_offload: missing netlink attribute policy The netlink attribute policy for NFTA_FLOW_TABLE_NAME is missing. Fixes: a3c90f7a2323 ("netfilter: nf_tables: flow offload expression") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 060a4ed46d5e..01705ad74a9a 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -149,6 +149,11 @@ static int nft_flow_offload_validate(const struct nft_ctx *ctx, return nft_chain_validate_hooks(ctx->chain, hook_mask); } +static const struct nla_policy nft_flow_offload_policy[NFTA_FLOW_MAX + 1] = { + [NFTA_FLOW_TABLE_NAME] = { .type = NLA_STRING, + .len = NFT_NAME_MAXLEN - 1 }, +}; + static int nft_flow_offload_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -207,6 +212,7 @@ static const struct nft_expr_ops nft_flow_offload_ops = { static struct nft_expr_type nft_flow_offload_type __read_mostly = { .name = "flow_offload", .ops = &nft_flow_offload_ops, + .policy = nft_flow_offload_policy, .maxattr = NFTA_FLOW_MAX, .owner = THIS_MODULE, }; -- cgit v1.2.1 From 89a26cd4b501e9511d3cd3d22327fc76a75a38b3 Mon Sep 17 00:00:00 2001 From: Juliana Rodrigueiro Date: Fri, 16 Aug 2019 17:02:22 +0200 Subject: netfilter: xt_nfacct: Fix alignment mismatch in xt_nfacct_match_info When running a 64-bit kernel with a 32-bit iptables binary, the size of the xt_nfacct_match_info struct diverges. kernel: sizeof(struct xt_nfacct_match_info) : 40 iptables: sizeof(struct xt_nfacct_match_info)) : 36 Trying to append nfacct related rules results in an unhelpful message. Although it is suggested to look for more information in dmesg, nothing can be found there. # iptables -A -m nfacct --nfacct-name iptables: Invalid argument. Run `dmesg' for more information. This patch fixes the memory misalignment by enforcing 8-byte alignment within the struct's first revision. This solution is often used in many other uapi netfilter headers. Signed-off-by: Juliana Rodrigueiro Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_nfacct.h | 5 +++++ net/netfilter/xt_nfacct.c | 36 ++++++++++++++++++++++---------- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/include/uapi/linux/netfilter/xt_nfacct.h b/include/uapi/linux/netfilter/xt_nfacct.h index 5c8a4d760ee3..b5123ab8d54a 100644 --- a/include/uapi/linux/netfilter/xt_nfacct.h +++ b/include/uapi/linux/netfilter/xt_nfacct.h @@ -11,4 +11,9 @@ struct xt_nfacct_match_info { struct nf_acct *nfacct; }; +struct xt_nfacct_match_info_v1 { + char name[NFACCT_NAME_MAX]; + struct nf_acct *nfacct __attribute__((aligned(8))); +}; + #endif /* _XT_NFACCT_MATCH_H */ diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index d0ab1adf5bff..5aab6df74e0f 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -54,25 +54,39 @@ nfacct_mt_destroy(const struct xt_mtdtor_param *par) nfnl_acct_put(info->nfacct); } -static struct xt_match nfacct_mt_reg __read_mostly = { - .name = "nfacct", - .family = NFPROTO_UNSPEC, - .checkentry = nfacct_mt_checkentry, - .match = nfacct_mt, - .destroy = nfacct_mt_destroy, - .matchsize = sizeof(struct xt_nfacct_match_info), - .usersize = offsetof(struct xt_nfacct_match_info, nfacct), - .me = THIS_MODULE, +static struct xt_match nfacct_mt_reg[] __read_mostly = { + { + .name = "nfacct", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = nfacct_mt_checkentry, + .match = nfacct_mt, + .destroy = nfacct_mt_destroy, + .matchsize = sizeof(struct xt_nfacct_match_info), + .usersize = offsetof(struct xt_nfacct_match_info, nfacct), + .me = THIS_MODULE, + }, + { + .name = "nfacct", + .revision = 1, + .family = NFPROTO_UNSPEC, + .checkentry = nfacct_mt_checkentry, + .match = nfacct_mt, + .destroy = nfacct_mt_destroy, + .matchsize = sizeof(struct xt_nfacct_match_info_v1), + .usersize = offsetof(struct xt_nfacct_match_info_v1, nfacct), + .me = THIS_MODULE, + }, }; static int __init nfacct_mt_init(void) { - return xt_register_match(&nfacct_mt_reg); + return xt_register_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg)); } static void __exit nfacct_mt_exit(void) { - xt_unregister_match(&nfacct_mt_reg); + xt_unregister_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg)); } module_init(nfacct_mt_init); -- cgit v1.2.1 From b0fdc01354f45d43f082025636ef808968a27b36 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 16 Aug 2019 18:06:26 +0200 Subject: sched/core: Schedule new worker even if PI-blocked If a task is PI-blocked (blocking on sleeping spinlock) then we don't want to schedule a new kworker if we schedule out due to lock contention because !RT does not do that as well. A spinning spinlock disables preemption and a worker does not schedule out on lock contention (but spin). On RT the RW-semaphore implementation uses an rtmutex so tsk_is_pi_blocked() will return true if a task blocks on it. In this case we will now start a new worker which may deadlock if one worker is waiting on progress from another worker. Since a RW-semaphore starts a new worker on !RT, we should do the same on RT. XFS is able to trigger this deadlock. Allow to schedule new worker if the current worker is PI-blocked. Signed-off-by: Sebastian Andrzej Siewior Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20190816160626.12742-1-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2b037f195473..010d578118d6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3904,7 +3904,7 @@ void __noreturn do_task_dead(void) static inline void sched_submit_work(struct task_struct *tsk) { - if (!tsk->state || tsk_is_pi_blocked(tsk)) + if (!tsk->state) return; /* @@ -3920,6 +3920,9 @@ static inline void sched_submit_work(struct task_struct *tsk) preempt_enable_no_resched(); } + if (tsk_is_pi_blocked(tsk)) + return; + /* * If we are going to sleep and we have plugged IO queued, * make sure to submit it to avoid deadlocks. -- cgit v1.2.1 From 77d760328ee015cf89460c52bfd5a6b0a09b7472 Mon Sep 17 00:00:00 2001 From: Su Yanjun Date: Fri, 16 Aug 2019 16:43:21 +0800 Subject: perf/x86: Fix typo in comment No functional change. Signed-off-by: Su Yanjun Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1565945001-4413-1-git-send-email-suyj.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 81b005e4c7d9..325959d19d9a 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1236,7 +1236,7 @@ void x86_pmu_enable_event(struct perf_event *event) * Add a single event to the PMU. * * The event is added to the group of enabled events - * but only if it can be scehduled with existing events. + * but only if it can be scheduled with existing events. */ static int x86_pmu_add(struct perf_event *event, int flags) { -- cgit v1.2.1 From f1c6ece23729257fb46562ff9224cf5f61b818da Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Mon, 12 Aug 2019 20:43:02 +0200 Subject: kprobes: Fix potential deadlock in kprobe_optimizer() lockdep reports the following deadlock scenario: WARNING: possible circular locking dependency detected kworker/1:1/48 is trying to acquire lock: 000000008d7a62b2 (text_mutex){+.+.}, at: kprobe_optimizer+0x163/0x290 but task is already holding lock: 00000000850b5e2d (module_mutex){+.+.}, at: kprobe_optimizer+0x31/0x290 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (module_mutex){+.+.}: __mutex_lock+0xac/0x9f0 mutex_lock_nested+0x1b/0x20 set_all_modules_text_rw+0x22/0x90 ftrace_arch_code_modify_prepare+0x1c/0x20 ftrace_run_update_code+0xe/0x30 ftrace_startup_enable+0x2e/0x50 ftrace_startup+0xa7/0x100 register_ftrace_function+0x27/0x70 arm_kprobe+0xb3/0x130 enable_kprobe+0x83/0xa0 enable_trace_kprobe.part.0+0x2e/0x80 kprobe_register+0x6f/0xc0 perf_trace_event_init+0x16b/0x270 perf_kprobe_init+0xa7/0xe0 perf_kprobe_event_init+0x3e/0x70 perf_try_init_event+0x4a/0x140 perf_event_alloc+0x93a/0xde0 __do_sys_perf_event_open+0x19f/0xf30 __x64_sys_perf_event_open+0x20/0x30 do_syscall_64+0x65/0x1d0 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #0 (text_mutex){+.+.}: __lock_acquire+0xfcb/0x1b60 lock_acquire+0xca/0x1d0 __mutex_lock+0xac/0x9f0 mutex_lock_nested+0x1b/0x20 kprobe_optimizer+0x163/0x290 process_one_work+0x22b/0x560 worker_thread+0x50/0x3c0 kthread+0x112/0x150 ret_from_fork+0x3a/0x50 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(module_mutex); lock(text_mutex); lock(module_mutex); lock(text_mutex); *** DEADLOCK *** As a reproducer I've been using bcc's funccount.py (https://github.com/iovisor/bcc/blob/master/tools/funccount.py), for example: # ./funccount.py '*interrupt*' That immediately triggers the lockdep splat. Fix by acquiring text_mutex before module_mutex in kprobe_optimizer(). Signed-off-by: Andrea Righi Acked-by: Masami Hiramatsu Cc: Anil S Keshavamurthy Cc: David S. Miller Cc: Linus Torvalds Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: d5b844a2cf50 ("ftrace/x86: Remove possible deadlock between register_kprobe() and ftrace_run_update_code()") Link: http://lkml.kernel.org/r/20190812184302.GA7010@xps-13 Signed-off-by: Ingo Molnar --- kernel/kprobes.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9873fc627d61..d9770a5393c8 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -470,6 +470,7 @@ static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); */ static void do_optimize_kprobes(void) { + lockdep_assert_held(&text_mutex); /* * The optimization/unoptimization refers online_cpus via * stop_machine() and cpu-hotplug modifies online_cpus. @@ -487,9 +488,7 @@ static void do_optimize_kprobes(void) list_empty(&optimizing_list)) return; - mutex_lock(&text_mutex); arch_optimize_kprobes(&optimizing_list); - mutex_unlock(&text_mutex); } /* @@ -500,6 +499,7 @@ static void do_unoptimize_kprobes(void) { struct optimized_kprobe *op, *tmp; + lockdep_assert_held(&text_mutex); /* See comment in do_optimize_kprobes() */ lockdep_assert_cpus_held(); @@ -507,7 +507,6 @@ static void do_unoptimize_kprobes(void) if (list_empty(&unoptimizing_list)) return; - mutex_lock(&text_mutex); arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); /* Loop free_list for disarming */ list_for_each_entry_safe(op, tmp, &freeing_list, list) { @@ -524,7 +523,6 @@ static void do_unoptimize_kprobes(void) } else list_del_init(&op->list); } - mutex_unlock(&text_mutex); } /* Reclaim all kprobes on the free_list */ @@ -556,6 +554,7 @@ static void kprobe_optimizer(struct work_struct *work) { mutex_lock(&kprobe_mutex); cpus_read_lock(); + mutex_lock(&text_mutex); /* Lock modules while optimizing kprobes */ mutex_lock(&module_mutex); @@ -583,6 +582,7 @@ static void kprobe_optimizer(struct work_struct *work) do_free_cleaned_kprobes(); mutex_unlock(&module_mutex); + mutex_unlock(&text_mutex); cpus_read_unlock(); mutex_unlock(&kprobe_mutex); -- cgit v1.2.1 From f897e60a12f0b9146357780d317879bce2a877dc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 9 Aug 2019 14:54:07 +0200 Subject: x86/apic: Handle missing global clockevent gracefully Some newer machines do not advertise legacy timers. The kernel can handle that situation if the TSC and the CPU frequency are enumerated by CPUID or MSRs and the CPU supports TSC deadline timer. If the CPU does not support TSC deadline timer the local APIC timer frequency has to be known as well. Some Ryzens machines do not advertize legacy timers, but there is no reliable way to determine the bus frequency which feeds the local APIC timer when the machine allows overclocking of that frequency. As there is no legacy timer the local APIC timer calibration crashes due to a NULL pointer dereference when accessing the not installed global clock event device. Switch the calibration loop to a non interrupt based one, which polls either TSC (if frequency is known) or jiffies. The latter requires a global clockevent. As the machines which do not have a global clockevent installed have a known TSC frequency this is a non issue. For older machines where TSC frequency is not known, there is no known case where the legacy timers do not exist as that would have been reported long ago. Reported-by: Daniel Drake Reported-by: Jiri Slaby Signed-off-by: Thomas Gleixner Tested-by: Daniel Drake Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908091443030.21433@nanos.tec.linutronix.de Link: http://bugzilla.opensuse.org/show_bug.cgi?id=1142926#c12 --- arch/x86/kernel/apic/apic.c | 68 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f5291362da1a..aa5495d0f478 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -722,7 +722,7 @@ static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; /* - * Temporary interrupt handler. + * Temporary interrupt handler and polled calibration function. */ static void __init lapic_cal_handler(struct clock_event_device *dev) { @@ -851,7 +851,8 @@ bool __init apic_needs_pit(void) static int __init calibrate_APIC_clock(void) { struct clock_event_device *levt = this_cpu_ptr(&lapic_events); - void (*real_handler)(struct clock_event_device *dev); + u64 tsc_perj = 0, tsc_start = 0; + unsigned long jif_start; unsigned long deltaj; long delta, deltatsc; int pm_referenced = 0; @@ -878,28 +879,64 @@ static int __init calibrate_APIC_clock(void) apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" "calibrating APIC timer ...\n"); + /* + * There are platforms w/o global clockevent devices. Instead of + * making the calibration conditional on that, use a polling based + * approach everywhere. + */ local_irq_disable(); - /* Replace the global interrupt handler */ - real_handler = global_clock_event->event_handler; - global_clock_event->event_handler = lapic_cal_handler; - /* * Setup the APIC counter to maximum. There is no way the lapic * can underflow in the 100ms detection time frame */ __setup_APIC_LVTT(0xffffffff, 0, 0); - /* Let the interrupts run */ + /* + * Methods to terminate the calibration loop: + * 1) Global clockevent if available (jiffies) + * 2) TSC if available and frequency is known + */ + jif_start = READ_ONCE(jiffies); + + if (tsc_khz) { + tsc_start = rdtsc(); + tsc_perj = div_u64((u64)tsc_khz * 1000, HZ); + } + + /* + * Enable interrupts so the tick can fire, if a global + * clockevent device is available + */ local_irq_enable(); - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); + while (lapic_cal_loops <= LAPIC_CAL_LOOPS) { + /* Wait for a tick to elapse */ + while (1) { + if (tsc_khz) { + u64 tsc_now = rdtsc(); + if ((tsc_now - tsc_start) >= tsc_perj) { + tsc_start += tsc_perj; + break; + } + } else { + unsigned long jif_now = READ_ONCE(jiffies); - local_irq_disable(); + if (time_after(jif_now, jif_start)) { + jif_start = jif_now; + break; + } + } + cpu_relax(); + } - /* Restore the real event handler */ - global_clock_event->event_handler = real_handler; + /* Invoke the calibration routine */ + local_irq_disable(); + lapic_cal_handler(NULL); + local_irq_enable(); + } + + local_irq_disable(); /* Build delta t1-t2 as apic timer counts down */ delta = lapic_cal_t1 - lapic_cal_t2; @@ -943,10 +980,11 @@ static int __init calibrate_APIC_clock(void) levt->features &= ~CLOCK_EVT_FEAT_DUMMY; /* - * PM timer calibration failed or not turned on - * so lets try APIC timer based calibration + * PM timer calibration failed or not turned on so lets try APIC + * timer based calibration, if a global clockevent device is + * available. */ - if (!pm_referenced) { + if (!pm_referenced && global_clock_event) { apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); /* -- cgit v1.2.1 From 33da8e7c814f77310250bb54a9db36a44c5de784 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Aug 2019 12:33:54 -0500 Subject: signal: Allow cifs and drbd to receive their terminating signals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My recent to change to only use force_sig for a synchronous events wound up breaking signal reception cifs and drbd. I had overlooked the fact that by default kthreads start out with all signals set to SIG_IGN. So a change I thought was safe turned out to have made it impossible for those kernel thread to catch their signals. Reverting the work on force_sig is a bad idea because what the code was doing was very much a misuse of force_sig. As the way force_sig ultimately allowed the signal to happen was to change the signal handler to SIG_DFL. Which after the first signal will allow userspace to send signals to these kernel threads. At least for wake_ack_receiver in drbd that does not appear actively wrong. So correct this problem by adding allow_kernel_signal that will allow signals whose siginfo reports they were sent by the kernel through, but will not allow userspace generated signals, and update cifs and drbd to call allow_kernel_signal in an appropriate place so that their thread can receive this signal. Fixing things this way ensures that userspace won't be able to send signals and cause problems, that it is clear which signals the threads are expecting to receive, and it guarantees that nothing else in the system will be affected. This change was partly inspired by similar cifs and drbd patches that added allow_signal. Reported-by: ronnie sahlberg Reported-by: Christoph Böhmwalder Tested-by: Christoph Böhmwalder Cc: Steve French Cc: Philipp Reisner Cc: David Laight Fixes: 247bc9470b1e ("cifs: fix rmmod regression in cifs.ko caused by force_sig changes") Fixes: 72abe3bcf091 ("signal/cifs: Fix cifs_put_tcp_session to call send_sig instead of force_sig") Fixes: fee109901f39 ("signal/drbd: Use send_sig not force_sig") Fixes: 3cf5d076fb4d ("signal: Remove task parameter from force_sig") Signed-off-by: "Eric W. Biederman" --- drivers/block/drbd/drbd_main.c | 2 ++ fs/cifs/connect.c | 2 +- include/linux/signal.h | 15 ++++++++++++++- kernel/signal.c | 5 +++++ 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 9bd4ddd12b25..5b248763a672 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -322,6 +322,8 @@ static int drbd_thread_setup(void *arg) thi->name[0], resource->name); + allow_kernel_signal(DRBD_SIGKILL); + allow_kernel_signal(SIGXCPU); restart: retval = thi->function(thi); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a15a6e738eb5..1795e80cbdf7 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1113,7 +1113,7 @@ cifs_demultiplex_thread(void *p) mempool_resize(cifs_req_poolp, length + cifs_min_rcv); set_freezable(); - allow_signal(SIGKILL); + allow_kernel_signal(SIGKILL); while (server->tcpStatus != CifsExiting) { if (try_to_freeze()) continue; diff --git a/include/linux/signal.h b/include/linux/signal.h index b5d99482d3fe..1a5f88316b08 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -282,6 +282,9 @@ extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping); extern void exit_signals(struct task_struct *tsk); extern void kernel_sigaction(int, __sighandler_t); +#define SIG_KTHREAD ((__force __sighandler_t)2) +#define SIG_KTHREAD_KERNEL ((__force __sighandler_t)3) + static inline void allow_signal(int sig) { /* @@ -289,7 +292,17 @@ static inline void allow_signal(int sig) * know it'll be handled, so that they don't get converted to * SIGKILL or just silently dropped. */ - kernel_sigaction(sig, (__force __sighandler_t)2); + kernel_sigaction(sig, SIG_KTHREAD); +} + +static inline void allow_kernel_signal(int sig) +{ + /* + * Kernel threads handle their own signals. Let the signal code + * know signals sent by the kernel will be handled, so that they + * don't get silently dropped. + */ + kernel_sigaction(sig, SIG_KTHREAD_KERNEL); } static inline void disallow_signal(int sig) diff --git a/kernel/signal.c b/kernel/signal.c index e667be6907d7..534fec266a33 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -90,6 +90,11 @@ static bool sig_task_ignored(struct task_struct *t, int sig, bool force) handler == SIG_DFL && !(force && sig_kernel_only(sig))) return true; + /* Only allow kernel generated signals to this kthread */ + if (unlikely((t->flags & PF_KTHREAD) && + (handler == SIG_KTHREAD_KERNEL) && !force)) + return true; + return sig_handler_ignored(handler, sig); } -- cgit v1.2.1 From 38a429c898ddd210cc35463b096389f97c3c5a73 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 19 Aug 2019 16:39:27 +0900 Subject: netfilter: add include guard to nf_conntrack_h323_types.h Add a header include guard just in case. Signed-off-by: Masahiro Yamada Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_h323_types.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/netfilter/nf_conntrack_h323_types.h b/include/linux/netfilter/nf_conntrack_h323_types.h index 7a6871ac8784..74c6f9241944 100644 --- a/include/linux/netfilter/nf_conntrack_h323_types.h +++ b/include/linux/netfilter/nf_conntrack_h323_types.h @@ -4,6 +4,9 @@ * Copyright (c) 2006 Jing Min Zhao */ +#ifndef _NF_CONNTRACK_H323_TYPES_H +#define _NF_CONNTRACK_H323_TYPES_H + typedef struct TransportAddress_ipAddress { /* SEQUENCE */ int options; /* No use */ unsigned int ip; @@ -931,3 +934,5 @@ typedef struct RasMessage { /* CHOICE */ InfoRequestResponse infoRequestResponse; }; } RasMessage; + +#endif /* _NF_CONNTRACK_H323_TYPES_H */ -- cgit v1.2.1 From b72fb1dcd2ea9d29417711cb302cef3006fa8d5a Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Wed, 7 Aug 2019 14:11:55 -0700 Subject: HID: wacom: Correct distance scale for 2nd-gen Intuos devices Distance values reported by 2nd-gen Intuos tablets are on an inverted scale (0 == far, 63 == near). We need to change them over to a normal scale before reporting to userspace or else userspace drivers and applications can get confused. Ref: https://github.com/linuxwacom/input-wacom/issues/98 Fixes: eda01dab53 ("HID: wacom: Add four new Intuos devices") Signed-off-by: Jason Gerecke Cc: # v4.4+ Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 50074485b88b..7a9e229e6253 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -846,6 +846,8 @@ static int wacom_intuos_general(struct wacom_wac *wacom) y >>= 1; distance >>= 1; } + if (features->type == INTUOSHT2) + distance = features->distance_max - distance; input_report_abs(input, ABS_X, x); input_report_abs(input, ABS_Y, y); input_report_abs(input, ABS_DISTANCE, distance); -- cgit v1.2.1 From b640be5bc8e4673dc8049cf74176ddedecea5597 Mon Sep 17 00:00:00 2001 From: Even Xu Date: Fri, 9 Aug 2019 21:18:29 +0800 Subject: HID: intel-ish-hid: ipc: add EHL device id EHL is a new platform using ishtp solution, add its device id to support list. Signed-off-by: Even Xu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ipc/hw-ish.h | 1 + drivers/hid/intel-ish-hid/ipc/pci-ish.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h index 1065692f90e2..5792a104000a 100644 --- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h +++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h @@ -24,6 +24,7 @@ #define ICL_MOBILE_DEVICE_ID 0x34FC #define SPT_H_DEVICE_ID 0xA135 #define CML_LP_DEVICE_ID 0x02FC +#define EHL_Ax_DEVICE_ID 0x4BB3 #define REVISION_ID_CHT_A0 0x6 #define REVISION_ID_CHT_Ax_SI 0x0 diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index aa80b4d3b740..279567baca3d 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -33,6 +33,7 @@ static const struct pci_device_id ish_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, ICL_MOBILE_DEVICE_ID)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, SPT_H_DEVICE_ID)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, CML_LP_DEVICE_ID)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, EHL_Ax_DEVICE_ID)}, {0, } }; MODULE_DEVICE_TABLE(pci, ish_pci_tbl); -- cgit v1.2.1 From 2d05dba2b25ecb0f8fc3a0b4eb2232da6454a47b Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 12 Aug 2019 18:04:44 +0200 Subject: HID: cp2112: prevent sleeping function called from invalid context When calling request_threaded_irq() with a CP2112, the function cp2112_gpio_irq_startup() is called in a IRQ context. Therefore we can not sleep, and we can not call cp2112_gpio_direction_input() there. Move the call to cp2112_gpio_direction_input() earlier to have a working driver. Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-cp2112.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 2310c96ccf4a..db1b55df0d13 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -1153,8 +1153,6 @@ static unsigned int cp2112_gpio_irq_startup(struct irq_data *d) INIT_DELAYED_WORK(&dev->gpio_poll_worker, cp2112_gpio_poll_callback); - cp2112_gpio_direction_input(gc, d->hwirq); - if (!dev->gpio_poll) { dev->gpio_poll = true; schedule_delayed_work(&dev->gpio_poll_worker, 0); @@ -1204,6 +1202,12 @@ static int __maybe_unused cp2112_allocate_irq(struct cp2112_device *dev, return PTR_ERR(dev->desc[pin]); } + ret = cp2112_gpio_direction_input(&dev->gc, pin); + if (ret < 0) { + dev_err(dev->gc.parent, "Failed to set GPIO to input dir\n"); + goto err_desc; + } + ret = gpiochip_lock_as_irq(&dev->gc, pin); if (ret) { dev_err(dev->gc.parent, "Failed to lock GPIO as interrupt\n"); -- cgit v1.2.1 From 7e10cc25bfa0dd3602bbcf5cc9c759a90eb675dc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Aug 2019 12:06:43 -0400 Subject: NFS: Don't refresh attributes with mounted-on-file information If we've been given the attributes of the mounted-on-file, then do not use those to check or update the attributes on the application-visible inode. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8a1758200b57..c764cfe456e5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1403,12 +1403,21 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) return 0; + /* No fileid? Just exit */ + if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) + return 0; /* Has the inode gone and changed behind our back? */ - if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) + if (nfsi->fileid != fattr->fileid) { + /* Is this perhaps the mounted-on fileid? */ + if ((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) && + nfsi->fileid == fattr->mounted_on_fileid) + return 0; return -ESTALE; + } if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) return -ESTALE; + if (!nfs_file_has_buffered_writers(nfsi)) { /* Verify a few of the more important attributes */ if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr)) @@ -1768,18 +1777,6 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc); -static inline bool nfs_fileid_valid(struct nfs_inode *nfsi, - struct nfs_fattr *fattr) -{ - bool ret1 = true, ret2 = true; - - if (fattr->valid & NFS_ATTR_FATTR_FILEID) - ret1 = (nfsi->fileid == fattr->fileid); - if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) - ret2 = (nfsi->fileid == fattr->mounted_on_fileid); - return ret1 || ret2; -} - /* * Many nfs protocol calls return the new file attributes after * an operation. Here we update the inode to reflect the state @@ -1810,7 +1807,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_display_fhandle_hash(NFS_FH(inode)), atomic_read(&inode->i_count), fattr->valid); - if (!nfs_fileid_valid(nfsi, fattr)) { + /* No fileid? Just exit */ + if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) + return 0; + /* Has the inode gone and changed behind our back? */ + if (nfsi->fileid != fattr->fileid) { + /* Is this perhaps the mounted-on fileid? */ + if ((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) && + nfsi->fileid == fattr->mounted_on_fileid) + return 0; printk(KERN_ERR "NFS: server %s error: fileid changed\n" "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", NFS_SERVER(inode)->nfs_client->cl_hostname, -- cgit v1.2.1 From 90cf500e338ab3f3c0f126ba37e36fb6a9058441 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Aug 2019 15:03:11 -0400 Subject: NFSv4: Fix return values for nfs4_file_open() Currently, we are translating RPC level errors such as timeouts, as well as interrupts etc into EOPENSTALE, which forces a single replay of the open attempt. What we actually want to do is force the replay only in the cases where the returned error indicates that the file may have changed on the server. So the fix is to spell out the exact set of errors where we want to return EOPENSTALE. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 96db471ca2e5..339663d04bf8 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -73,13 +73,13 @@ nfs4_file_open(struct inode *inode, struct file *filp) if (IS_ERR(inode)) { err = PTR_ERR(inode); switch (err) { - case -EPERM: - case -EACCES: - case -EDQUOT: - case -ENOSPC: - case -EROFS: - goto out_put_ctx; default: + goto out_put_ctx; + case -ENOENT: + case -ESTALE: + case -EISDIR: + case -ENOTDIR: + case -ELOOP: goto out_drop; } } -- cgit v1.2.1 From 9821421a291b548ef4369c6998745baa36ddecd5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Aug 2019 12:15:07 -0400 Subject: NFSv4: Fix return value in nfs_finish_open() If the file turns out to be of the wrong type after opening, we want to revalidate the path and retry, so return EOPENSTALE rather than ESTALE. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8d501093660f..0adfd8840110 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1487,7 +1487,7 @@ static int nfs_finish_open(struct nfs_open_context *ctx, if (S_ISREG(file->f_path.dentry->d_inode->i_mode)) nfs_file_set_open_context(file, ctx); else - err = -ESTALE; + err = -EOPENSTALE; out: return err; } -- cgit v1.2.1 From f4340e9314dbfadc48758945f85fc3b16612d06f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Aug 2019 15:19:54 -0400 Subject: NFSv4/pnfs: Fix a page lock leak in nfs_pageio_resend() If the attempt to resend the pages fails, we need to ensure that we clean up those pages that were not transmitted. Fixes: d600ad1f2bdb ("NFS41: pop some layoutget errors to application") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.5+ --- fs/nfs/pagelist.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ed4e1b07447b..15c254753f88 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1251,20 +1251,22 @@ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, int nfs_pageio_resend(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { - LIST_HEAD(failed); + LIST_HEAD(pages); desc->pg_io_completion = hdr->io_completion; desc->pg_dreq = hdr->dreq; - while (!list_empty(&hdr->pages)) { - struct nfs_page *req = nfs_list_entry(hdr->pages.next); + list_splice_init(&hdr->pages, &pages); + while (!list_empty(&pages)) { + struct nfs_page *req = nfs_list_entry(pages.next); if (!nfs_pageio_add_request(desc, req)) - nfs_list_move_request(req, &failed); + break; } nfs_pageio_complete(desc); - if (!list_empty(&failed)) { - list_move(&failed, &hdr->pages); - return desc->pg_error < 0 ? desc->pg_error : -EIO; + if (!list_empty(&pages)) { + int err = desc->pg_error < 0 ? desc->pg_error : -EIO; + hdr->completion_ops->error_cleanup(&pages, err); + return err; } return 0; } -- cgit v1.2.1 From eb2c50da9e256dbbb3ff27694440e4c1900cfef8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Aug 2019 18:04:36 -0400 Subject: NFS: Ensure O_DIRECT reports an error if the bytes read/written is 0 If the attempt to resend the I/O results in no bytes being read/written, we must ensure that we report the error. Signed-off-by: Trond Myklebust Fixes: 0a00b77b331a ("nfs: mirroring support for direct io") Cc: stable@vger.kernel.org # v3.20+ --- fs/nfs/direct.c | 27 ++++++++++++++++++--------- fs/nfs/pagelist.c | 1 + 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 0cb442406168..222d7115db71 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -401,15 +401,21 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) unsigned long bytes = 0; struct nfs_direct_req *dreq = hdr->dreq; - if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) - goto out_put; - spin_lock(&dreq->lock); - if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) dreq->error = hdr->error; - else + + if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) { + spin_unlock(&dreq->lock); + goto out_put; + } + + if (hdr->good_bytes != 0) nfs_direct_good_bytes(dreq, hdr); + if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) + dreq->error = 0; + spin_unlock(&dreq->lock); while (!list_empty(&hdr->pages)) { @@ -782,16 +788,19 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) bool request_commit = false; struct nfs_page *req = nfs_list_entry(hdr->pages.next); - if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) - goto out_put; - nfs_init_cinfo_from_dreq(&cinfo, dreq); spin_lock(&dreq->lock); if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) dreq->error = hdr->error; - if (dreq->error == 0) { + + if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) { + spin_unlock(&dreq->lock); + goto out_put; + } + + if (hdr->good_bytes != 0) { nfs_direct_good_bytes(dreq, hdr); if (nfs_write_need_commit(hdr)) { if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 15c254753f88..56cefa0ab804 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1266,6 +1266,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc, if (!list_empty(&pages)) { int err = desc->pg_error < 0 ? desc->pg_error : -EIO; hdr->completion_ops->error_cleanup(&pages, err); + nfs_set_pgio_error(hdr, err, hdr->io_start); return err; } return 0; -- cgit v1.2.1 From 17d8c5d145000070c581f2a8aa01edc7998582ab Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 14 Aug 2019 14:19:09 -0400 Subject: NFS: Fix initialisation of I/O result struct in nfs_pgio_rpcsetup Initialise the result count to 0 rather than initialising it to the argument count. The reason is that we want to ensure we record the I/O stats correctly in the case where an error is returned (for instance in the layoutstats). Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 56cefa0ab804..20b3717cd7ca 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -590,7 +590,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr, } hdr->res.fattr = &hdr->fattr; - hdr->res.count = count; + hdr->res.count = 0; hdr->res.eof = 0; hdr->res.verf = &hdr->verf; nfs_fattr_init(&hdr->fattr); -- cgit v1.2.1 From 06c9fdf3b9f1acc6e53753c99c54c39764cc979f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 14 Aug 2019 15:42:43 -0400 Subject: NFS: On fatal writeback errors, we need to call nfs_inode_remove_request() If the writeback error is fatal, we need to remove the tracking structures (i.e. the nfs_page) from the inode. Fixes: 6fbda89b257f ("NFS: Replace custom error reporting mechanism...") Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 92d9cadc6102..3399149435ce 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -57,6 +57,7 @@ static const struct rpc_call_ops nfs_commit_ops; static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; static const struct nfs_commit_completion_ops nfs_commit_completion_ops; static const struct nfs_rw_ops nfs_rw_write_ops; +static void nfs_inode_remove_request(struct nfs_page *req); static void nfs_clear_request_commit(struct nfs_page *req); static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, struct inode *inode); @@ -591,7 +592,9 @@ release_request: static void nfs_write_error(struct nfs_page *req, int error) { + nfs_set_pageerror(page_file_mapping(req->wb_page)); nfs_mapping_set_error(req->wb_page, error); + nfs_inode_remove_request(req); nfs_end_page_writeback(req); nfs_release_request(req); } -- cgit v1.2.1 From 0a46fff2f9108c2c44218380a43a736cf4612541 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 13 Aug 2019 16:16:54 +0300 Subject: x86/boot/compressed/64: Fix boot on machines with broken E820 table BIOS on Samsung 500C Chromebook reports very rudimentary E820 table that consists of 2 entries: BIOS-e820: [mem 0x0000000000000000-0x0000000000000fff] usable BIOS-e820: [mem 0x00000000fffff000-0x00000000ffffffff] reserved It breaks logic in find_trampoline_placement(): bios_start lands on the end of the first 4k page and trampoline start gets placed below 0. Detect underflow and don't touch bios_start for such cases. It makes kernel ignore E820 table on machines that doesn't have two usable pages below BIOS_START_MAX. Fixes: 1b3a62643660 ("x86/boot/compressed/64: Validate trampoline placement against E820") Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: x86-ml Link: https://bugzilla.kernel.org/show_bug.cgi?id=203463 Link: https://lkml.kernel.org/r/20190813131654.24378-1-kirill.shutemov@linux.intel.com --- arch/x86/boot/compressed/pgtable_64.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 5f2d03067ae5..2faddeb0398a 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -72,6 +72,8 @@ static unsigned long find_trampoline_placement(void) /* Find the first usable memory region under bios_start. */ for (i = boot_params->e820_entries - 1; i >= 0; i--) { + unsigned long new; + entry = &boot_params->e820_table[i]; /* Skip all entries above bios_start. */ @@ -84,15 +86,20 @@ static unsigned long find_trampoline_placement(void) /* Adjust bios_start to the end of the entry if needed. */ if (bios_start > entry->addr + entry->size) - bios_start = entry->addr + entry->size; + new = entry->addr + entry->size; /* Keep bios_start page-aligned. */ - bios_start = round_down(bios_start, PAGE_SIZE); + new = round_down(new, PAGE_SIZE); /* Skip the entry if it's too small. */ - if (bios_start - TRAMPOLINE_32BIT_SIZE < entry->addr) + if (new - TRAMPOLINE_32BIT_SIZE < entry->addr) continue; + /* Protect against underflow. */ + if (new - TRAMPOLINE_32BIT_SIZE > bios_start) + break; + + bios_start = new; break; } -- cgit v1.2.1 From 555df336c754ac9de1af9a5c72508918b3796b18 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 Aug 2019 16:02:01 +0100 Subject: keys: Fix description size The maximum key description size is 4095. Commit f771fde82051 ("keys: Simplify key description management") inadvertantly reduced that to 255 and made sizes between 256 and 4095 work weirdly, and any size whereby size & 255 == 0 would cause an assertion in __key_link_begin() at the following line: BUG_ON(index_key->desc_len == 0); This can be fixed by simply increasing the size of desc_len in struct keyring_index_key to a u16. Note the argument length test in keyutils only checked empty descriptions and descriptions with a size around the limit (ie. 4095) and not for all the values in between, so it missed this. This has been addressed and https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/keyutils.git/commit/?id=066bf56807c26cd3045a25f355b34c1d8a20a5aa now exhaustively tests all possible lengths of type, description and payload and then some. The assertion failure looks something like: kernel BUG at security/keys/keyring.c:1245! ... RIP: 0010:__key_link_begin+0x88/0xa0 ... Call Trace: key_create_or_update+0x211/0x4b0 __x64_sys_add_key+0x101/0x200 do_syscall_64+0x5b/0x1e0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 It can be triggered by: keyctl add user "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" a @s Fixes: f771fde82051 ("keys: Simplify key description management") Reported-by: kernel test robot Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/linux/key.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/key.h b/include/linux/key.h index 91f391cd272e..50028338a4cc 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -94,11 +94,11 @@ struct keyring_index_key { union { struct { #ifdef __LITTLE_ENDIAN /* Put desc_len at the LSB of x */ - u8 desc_len; - char desc[sizeof(long) - 1]; /* First few chars of description */ + u16 desc_len; + char desc[sizeof(long) - 2]; /* First few chars of description */ #else - char desc[sizeof(long) - 1]; /* First few chars of description */ - u8 desc_len; + char desc[sizeof(long) - 2]; /* First few chars of description */ + u16 desc_len; #endif }; unsigned long x; -- cgit v1.2.1 From f9ef724d4896763479f3921afd1ee61552fc9836 Mon Sep 17 00:00:00 2001 From: Jeronimo Borque Date: Sun, 18 Aug 2019 22:35:38 -0300 Subject: ALSA: hda - Fixes inverted Conexant GPIO mic mute led "enabled" parameter historically referred to the device input or output, not to the led indicator. After the changes added with the led helper functions the mic mute led logic refers to the led and not to the mic input which caused led indicator to be negated. Fixing logic in cxt_update_gpio_led and updated cxt_fixup_gpio_mute_hook Also updated debug messages to ease further debugging if necessary. Fixes: 184e302b46c9 ("ALSA: hda/conexant - Use the mic-mute LED helper") Suggested-by: Takashi Iwai Signed-off-by: Jeronimo Borque Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 14298ef45b21..968d3caab6ac 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -611,18 +611,20 @@ static void cxt_fixup_hp_gate_mic_jack(struct hda_codec *codec, /* update LED status via GPIO */ static void cxt_update_gpio_led(struct hda_codec *codec, unsigned int mask, - bool enabled) + bool led_on) { struct conexant_spec *spec = codec->spec; unsigned int oldval = spec->gpio_led; if (spec->mute_led_polarity) - enabled = !enabled; + led_on = !led_on; - if (enabled) - spec->gpio_led &= ~mask; - else + if (led_on) spec->gpio_led |= mask; + else + spec->gpio_led &= ~mask; + codec_dbg(codec, "mask:%d enabled:%d gpio_led:%d\n", + mask, led_on, spec->gpio_led); if (spec->gpio_led != oldval) snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, spec->gpio_led); @@ -633,8 +635,8 @@ static void cxt_fixup_gpio_mute_hook(void *private_data, int enabled) { struct hda_codec *codec = private_data; struct conexant_spec *spec = codec->spec; - - cxt_update_gpio_led(codec, spec->gpio_mute_led_mask, enabled); + /* muted -> LED on */ + cxt_update_gpio_led(codec, spec->gpio_mute_led_mask, !enabled); } /* turn on/off mic-mute LED via GPIO per capture hook */ @@ -656,7 +658,6 @@ static void cxt_fixup_mute_led_gpio(struct hda_codec *codec, { 0x01, AC_VERB_SET_GPIO_DIRECTION, 0x03 }, {} }; - codec_info(codec, "action: %d gpio_led: %d\n", action, spec->gpio_led); if (action == HDA_FIXUP_ACT_PRE_PROBE) { spec->gen.vmaster_mute.hook = cxt_fixup_gpio_mute_hook; -- cgit v1.2.1 From c49a0a80137c7ca7d6ced4c812c9e07a949f6f24 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 19 Aug 2019 15:52:35 +0000 Subject: x86/CPU/AMD: Clear RDRAND CPUID bit on AMD family 15h/16h There have been reports of RDRAND issues after resuming from suspend on some AMD family 15h and family 16h systems. This issue stems from a BIOS not performing the proper steps during resume to ensure RDRAND continues to function properly. RDRAND support is indicated by CPUID Fn00000001_ECX[30]. This bit can be reset by clearing MSR C001_1004[62]. Any software that checks for RDRAND support using CPUID, including the kernel, will believe that RDRAND is not supported. Update the CPU initialization to clear the RDRAND CPUID bit for any family 15h and 16h processor that supports RDRAND. If it is known that the family 15h or family 16h system does not have an RDRAND resume issue or that the system will not be placed in suspend, the "rdrand=force" kernel parameter can be used to stop the clearing of the RDRAND CPUID bit. Additionally, update the suspend and resume path to save and restore the MSR C001_1004 value to ensure that the RDRAND CPUID setting remains in place after resuming from suspend. Note, that clearing the RDRAND CPUID bit does not prevent a processor that normally supports the RDRAND instruction from executing it. So any code that determined the support based on family and model won't #UD. Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov Cc: Andrew Cooper Cc: Andrew Morton Cc: Chen Yu Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: "linux-doc@vger.kernel.org" Cc: "linux-pm@vger.kernel.org" Cc: Nathan Chancellor Cc: Paolo Bonzini Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: Cc: Thomas Gleixner Cc: "x86@kernel.org" Link: https://lkml.kernel.org/r/7543af91666f491547bd86cebb1e17c66824ab9f.1566229943.git.thomas.lendacky@amd.com --- Documentation/admin-guide/kernel-parameters.txt | 7 ++ arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/amd.c | 66 +++++++++++++++++++ arch/x86/power/cpu.c | 86 +++++++++++++++++++++---- 4 files changed, 147 insertions(+), 13 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 47d981a86e2f..4c1971960afa 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4090,6 +4090,13 @@ Run specified binary instead of /init from the ramdisk, used for early userspace startup. See initrd. + rdrand= [X86] + force - Override the decision by the kernel to hide the + advertisement of RDRAND support (this affects + certain AMD processors because of buggy BIOS + support, specifically around the suspend/resume + path). + rdt= [HW,X86,RDT] Turn on/off individual RDT features. List is: cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp, diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 6b4fc2788078..271d837d69a8 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -381,6 +381,7 @@ #define MSR_AMD64_PATCH_LEVEL 0x0000008b #define MSR_AMD64_TSC_RATIO 0xc0000104 #define MSR_AMD64_NB_CFG 0xc001001f +#define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_PATCH_LOADER 0xc0010020 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8d4e50428b68..68c363c341bf 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -804,6 +804,64 @@ static void init_amd_ln(struct cpuinfo_x86 *c) msr_set_bit(MSR_AMD64_DE_CFG, 31); } +static bool rdrand_force; + +static int __init rdrand_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "force")) + rdrand_force = true; + else + return -EINVAL; + + return 0; +} +early_param("rdrand", rdrand_cmdline); + +static void clear_rdrand_cpuid_bit(struct cpuinfo_x86 *c) +{ + /* + * Saving of the MSR used to hide the RDRAND support during + * suspend/resume is done by arch/x86/power/cpu.c, which is + * dependent on CONFIG_PM_SLEEP. + */ + if (!IS_ENABLED(CONFIG_PM_SLEEP)) + return; + + /* + * The nordrand option can clear X86_FEATURE_RDRAND, so check for + * RDRAND support using the CPUID function directly. + */ + if (!(cpuid_ecx(1) & BIT(30)) || rdrand_force) + return; + + msr_clear_bit(MSR_AMD64_CPUID_FN_1, 62); + + /* + * Verify that the CPUID change has occurred in case the kernel is + * running virtualized and the hypervisor doesn't support the MSR. + */ + if (cpuid_ecx(1) & BIT(30)) { + pr_info_once("BIOS may not properly restore RDRAND after suspend, but hypervisor does not support hiding RDRAND via CPUID.\n"); + return; + } + + clear_cpu_cap(c, X86_FEATURE_RDRAND); + pr_info_once("BIOS may not properly restore RDRAND after suspend, hiding RDRAND via CPUID. Use rdrand=force to reenable.\n"); +} + +static void init_amd_jg(struct cpuinfo_x86 *c) +{ + /* + * Some BIOS implementations do not restore proper RDRAND support + * across suspend and resume. Check on whether to hide the RDRAND + * instruction support via CPUID. + */ + clear_rdrand_cpuid_bit(c); +} + static void init_amd_bd(struct cpuinfo_x86 *c) { u64 value; @@ -818,6 +876,13 @@ static void init_amd_bd(struct cpuinfo_x86 *c) wrmsrl_safe(MSR_F15H_IC_CFG, value); } } + + /* + * Some BIOS implementations do not restore proper RDRAND support + * across suspend and resume. Check on whether to hide the RDRAND + * instruction support via CPUID. + */ + clear_rdrand_cpuid_bit(c); } static void init_amd_zn(struct cpuinfo_x86 *c) @@ -860,6 +925,7 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x10: init_amd_gh(c); break; case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; + case 0x16: init_amd_jg(c); break; case 0x17: init_amd_zn(c); break; } diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 24b079e94bc2..c9ef6a7a4a1a 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -23,7 +24,7 @@ #include #include #include -#include +#include #ifdef CONFIG_X86_32 __visible unsigned long saved_context_ebx; @@ -397,15 +398,14 @@ static int __init bsp_pm_check_init(void) core_initcall(bsp_pm_check_init); -static int msr_init_context(const u32 *msr_id, const int total_num) +static int msr_build_context(const u32 *msr_id, const int num) { - int i = 0; + struct saved_msrs *saved_msrs = &saved_context.saved_msrs; struct saved_msr *msr_array; + int total_num; + int i, j; - if (saved_context.saved_msrs.array || saved_context.saved_msrs.num > 0) { - pr_err("x86/pm: MSR quirk already applied, please check your DMI match table.\n"); - return -EINVAL; - } + total_num = saved_msrs->num + num; msr_array = kmalloc_array(total_num, sizeof(struct saved_msr), GFP_KERNEL); if (!msr_array) { @@ -413,19 +413,30 @@ static int msr_init_context(const u32 *msr_id, const int total_num) return -ENOMEM; } - for (i = 0; i < total_num; i++) { - msr_array[i].info.msr_no = msr_id[i]; + if (saved_msrs->array) { + /* + * Multiple callbacks can invoke this function, so copy any + * MSR save requests from previous invocations. + */ + memcpy(msr_array, saved_msrs->array, + sizeof(struct saved_msr) * saved_msrs->num); + + kfree(saved_msrs->array); + } + + for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) { + msr_array[i].info.msr_no = msr_id[j]; msr_array[i].valid = false; msr_array[i].info.reg.q = 0; } - saved_context.saved_msrs.num = total_num; - saved_context.saved_msrs.array = msr_array; + saved_msrs->num = total_num; + saved_msrs->array = msr_array; return 0; } /* - * The following section is a quirk framework for problematic BIOSen: + * The following sections are a quirk framework for problematic BIOSen: * Sometimes MSRs are modified by the BIOSen after suspended to * RAM, this might cause unexpected behavior after wakeup. * Thus we save/restore these specified MSRs across suspend/resume @@ -440,7 +451,7 @@ static int msr_initialize_bdw(const struct dmi_system_id *d) u32 bdw_msr_id[] = { MSR_IA32_THERM_CONTROL }; pr_info("x86/pm: %s detected, MSR saving is needed during suspending.\n", d->ident); - return msr_init_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id)); + return msr_build_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id)); } static const struct dmi_system_id msr_save_dmi_table[] = { @@ -455,9 +466,58 @@ static const struct dmi_system_id msr_save_dmi_table[] = { {} }; +static int msr_save_cpuid_features(const struct x86_cpu_id *c) +{ + u32 cpuid_msr_id[] = { + MSR_AMD64_CPUID_FN_1, + }; + + pr_info("x86/pm: family %#hx cpu detected, MSR saving is needed during suspending.\n", + c->family); + + return msr_build_context(cpuid_msr_id, ARRAY_SIZE(cpuid_msr_id)); +} + +static const struct x86_cpu_id msr_save_cpu_table[] = { + { + .vendor = X86_VENDOR_AMD, + .family = 0x15, + .model = X86_MODEL_ANY, + .feature = X86_FEATURE_ANY, + .driver_data = (kernel_ulong_t)msr_save_cpuid_features, + }, + { + .vendor = X86_VENDOR_AMD, + .family = 0x16, + .model = X86_MODEL_ANY, + .feature = X86_FEATURE_ANY, + .driver_data = (kernel_ulong_t)msr_save_cpuid_features, + }, + {} +}; + +typedef int (*pm_cpu_match_t)(const struct x86_cpu_id *); +static int pm_cpu_check(const struct x86_cpu_id *c) +{ + const struct x86_cpu_id *m; + int ret = 0; + + m = x86_match_cpu(msr_save_cpu_table); + if (m) { + pm_cpu_match_t fn; + + fn = (pm_cpu_match_t)m->driver_data; + ret = fn(m); + } + + return ret; +} + static int pm_check_save_msr(void) { dmi_check_system(msr_save_dmi_table); + pm_cpu_check(msr_save_cpu_table); + return 0; } -- cgit v1.2.1 From d0ff14fdc987303aeeb7de6f1bd72c3749ae2a9b Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Thu, 1 Aug 2019 23:53:53 +0000 Subject: genirq: Properly pair kobject_del() with kobject_add() If alloc_descs() fails before irq_sysfs_init() has run, free_desc() in the cleanup path will call kobject_del() even though the kobject has not been added with kobject_add(). Fix this by making the call to kobject_del() conditional on whether irq_sysfs_init() has run. This problem surfaced because commit aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type") makes kobject_del() stricter about pairing with kobject_add(). If the pairing is incorrrect, a WARNING and backtrace occur in sysfs_remove_group() because there is no parent. [ tglx: Add a comment to the code and make it work with CONFIG_SYSFS=n ] Fixes: ecb3f394c5db ("genirq: Expose interrupt information through sysfs") Signed-off-by: Michael Kelley Signed-off-by: Thomas Gleixner Acked-by: Greg Kroah-Hartman Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1564703564-4116-1-git-send-email-mikelley@microsoft.com --- kernel/irq/irqdesc.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9484e88dabc2..9be995fc3c5a 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -295,6 +295,18 @@ static void irq_sysfs_add(int irq, struct irq_desc *desc) } } +static void irq_sysfs_del(struct irq_desc *desc) +{ + /* + * If irq_sysfs_init() has not yet been invoked (early boot), then + * irq_kobj_base is NULL and the descriptor was never added. + * kobject_del() complains about a object with no parent, so make + * it conditional. + */ + if (irq_kobj_base) + kobject_del(&desc->kobj); +} + static int __init irq_sysfs_init(void) { struct irq_desc *desc; @@ -325,6 +337,7 @@ static struct kobj_type irq_kobj_type = { }; static void irq_sysfs_add(int irq, struct irq_desc *desc) {} +static void irq_sysfs_del(struct irq_desc *desc) {} #endif /* CONFIG_SYSFS */ @@ -438,7 +451,7 @@ static void free_desc(unsigned int irq) * The sysfs entry must be serialized against a concurrent * irq_sysfs_init() as well. */ - kobject_del(&desc->kobj); + irq_sysfs_del(desc); delete_irq_desc(irq); /* -- cgit v1.2.1 From ef8d8ccdc216f797e66cb4a1372f5c4c285ce1e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 16 Aug 2019 21:26:22 -0700 Subject: tcp: make sure EPOLLOUT wont be missed As Jason Baron explained in commit 790ba4566c1a ("tcp: set SOCK_NOSPACE under memory pressure"), it is crucial we properly set SOCK_NOSPACE when needed. However, Jason patch had a bug, because the 'nonblocking' status as far as sk_stream_wait_memory() is concerned is governed by MSG_DONTWAIT flag passed at sendmsg() time : long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); So it is very possible that tcp sendmsg() calls sk_stream_wait_memory(), and that sk_stream_wait_memory() returns -EAGAIN with SOCK_NOSPACE cleared, if sk->sk_sndtimeo has been set to a small (but not zero) value. This patch removes the 'noblock' variable since we must always set SOCK_NOSPACE if -EAGAIN is returned. It also renames the do_nonblock label since we might reach this code path even if we were in blocking mode. Fixes: 790ba4566c1a ("tcp: set SOCK_NOSPACE under memory pressure") Signed-off-by: Eric Dumazet Cc: Jason Baron Reported-by: Vladimir Rutsky Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Acked-by: Jason Baron Signed-off-by: David S. Miller --- net/core/stream.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/net/core/stream.c b/net/core/stream.c index e94bb02a5629..4f1d4aa5fb38 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -120,7 +120,6 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) int err = 0; long vm_wait = 0; long current_timeo = *timeo_p; - bool noblock = (*timeo_p ? false : true); DEFINE_WAIT_FUNC(wait, woken_wake_function); if (sk_stream_memory_free(sk)) @@ -133,11 +132,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; - if (!*timeo_p) { - if (noblock) - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - goto do_nonblock; - } + if (!*timeo_p) + goto do_eagain; if (signal_pending(current)) goto do_interrupted; sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -169,7 +165,13 @@ out: do_error: err = -EPIPE; goto out; -do_nonblock: +do_eagain: + /* Make sure that whenever EAGAIN is returned, EPOLLOUT event can + * be generated later. + * When TCP receives ACK packets that make room, tcp_check_space() + * only calls tcp_new_space() if SOCK_NOSPACE is set. + */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); err = -EAGAIN; goto out; do_interrupted: -- cgit v1.2.1 From 3a7ef457e85173a5b9ec7a03016db5a57b717b33 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Tue, 13 Aug 2019 00:46:01 +0200 Subject: ipv6: Fix return value of ipv6_mc_may_pull() for malformed packets Commit ba5ea614622d ("bridge: simplify ip_mc_check_igmp() and ipv6_mc_check_mld() calls") replaces direct calls to pskb_may_pull() in br_ipv6_multicast_mld2_report() with calls to ipv6_mc_may_pull(), that returns -EINVAL on buffers too short to be valid IPv6 packets, while maintaining the previous handling of the return code. This leads to the direct opposite of the intended effect: if the packet is malformed, -EINVAL evaluates as true, and we'll happily proceed with the processing. Return 0 if the packet is too short, in the same way as this was fixed for IPv4 by commit 083b78a9ed64 ("ip: fix ip_mc_may_pull() return value"). I don't have a reproducer for this, unlike the one referred to by the IPv4 commit, but this is clearly broken. Fixes: ba5ea614622d ("bridge: simplify ip_mc_check_igmp() and ipv6_mc_check_mld() calls") Signed-off-by: Stefano Brivio Acked-by: Guillaume Nault Signed-off-by: David S. Miller --- include/net/addrconf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index becdad576859..3f62b347b04a 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -206,7 +206,7 @@ static inline int ipv6_mc_may_pull(struct sk_buff *skb, unsigned int len) { if (skb_transport_offset(skb) + ipv6_transport_len(skb) < len) - return -EINVAL; + return 0; return pskb_may_pull(skb, len); } -- cgit v1.2.1 From 2f102274e8129c9c0bb3a2bde0f641531aefea8b Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Tue, 13 Aug 2019 09:05:30 +0300 Subject: MAINTAINERS: net_failover: Fix typo in a filepath Replace "driver" with "drivers" in the filepath to net_failover.c Cc: Sridhar Samudrala Cc: David S. Miller Cc: netdev@vger.kernel.org Fixes: cfc80d9a1163 ("net: Introduce net_failover driver") Signed-off-by: Denis Efremov Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 22b8273069af..a744851db1df 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11080,7 +11080,7 @@ NET_FAILOVER MODULE M: Sridhar Samudrala L: netdev@vger.kernel.org S: Supported -F: driver/net/net_failover.c +F: drivers/net/net_failover.c F: include/net/net_failover.h F: Documentation/networking/net_failover.rst -- cgit v1.2.1 From cd9d4ff9b78fcd0fc4708900ba3e52e71e1a7690 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 19 Aug 2019 07:04:25 +0200 Subject: Kconfig: Fix the reference to the IDT77105 Phy driver in the description of ATM_NICSTAR_USE_IDT77105 This should be IDT77105, not IDT77015. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/atm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/Kconfig b/drivers/atm/Kconfig index 2e2efa577437..8c37294f1d1e 100644 --- a/drivers/atm/Kconfig +++ b/drivers/atm/Kconfig @@ -200,7 +200,7 @@ config ATM_NICSTAR_USE_SUNI make the card work). config ATM_NICSTAR_USE_IDT77105 - bool "Use IDT77015 PHY driver (25Mbps)" + bool "Use IDT77105 PHY driver (25Mbps)" depends on ATM_NICSTAR help Support for the PHYsical layer chip in ForeRunner LE25 cards. In -- cgit v1.2.1 From b68271609c4f16a79eae8069933f64345afcf888 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 19 Aug 2019 18:15:28 -0700 Subject: fs/xfs: Fix return code of xfs_break_leased_layouts() The parens used in the while loop would result in error being assigned the value 1 rather than the intended errno value. This is required to return -ETXTBSY from follow on break_layout() changes. Signed-off-by: Ira Weiny Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 0c954cad7449..a339bd5fa260 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -32,7 +32,7 @@ xfs_break_leased_layouts( struct xfs_inode *ip = XFS_I(inode); int error; - while ((error = break_layout(inode, false) == -EWOULDBLOCK)) { + while ((error = break_layout(inode, false)) == -EWOULDBLOCK) { xfs_iunlock(ip, *iolock); *did_unlock = true; error = break_layout(inode, true); -- cgit v1.2.1 From 1edfb8ed6cc12107c2ec61b5be7bc881cfc4460e Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 19 Aug 2019 10:33:04 +0300 Subject: nfp: flower: verify that block cb is not busy before binding When processing FLOW_BLOCK_BIND command on indirect block, check that flow block cb is not busy. Fixes: 0d4fd02e7199 ("net: flow_offload: add flow_block_cb_is_busy() and use it") Reported-by: Jakub Kicinski Signed-off-by: Vlad Buslov Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/offload.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index e209f150c5f2..9917d64694c6 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1416,6 +1416,13 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app, switch (f->command) { case FLOW_BLOCK_BIND: + cb_priv = nfp_flower_indr_block_cb_priv_lookup(app, netdev); + if (cb_priv && + flow_block_cb_is_busy(nfp_flower_setup_indr_block_cb, + cb_priv, + &nfp_block_cb_list)) + return -EBUSY; + cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL); if (!cb_priv) return -ENOMEM; -- cgit v1.2.1 From 77ffd3465ba837e9dc714e17b014e77b2eae765a Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 15 Aug 2019 19:36:49 -0700 Subject: scsi: lpfc: Mitigate high memory pre-allocation by SCSI-MQ When SCSI-MQ is enabled, the SCSI-MQ layers will do pre-allocation of MQ resources based on shost values set by the driver. In newer cases of the driver, which attempts to set nr_hw_queues to the cpu count, the multipliers become excessive, with a single shost having SCSI-MQ pre-allocation reaching into the multiple GBytes range. NPIV, which creates additional shosts, only multiply this overhead. On lower-memory systems, this can exhaust system memory very quickly, resulting in a system crash or failures in the driver or elsewhere due to low memory conditions. After testing several scenarios, the situation can be mitigated by limiting the value set in shost->nr_hw_queues to 4. Although the shost values were changed, the driver still had per-cpu hardware queues of its own that allowed parallelization per-cpu. Testing revealed that even with the smallish number for nr_hw_queues for SCSI-MQ, performance levels remained near maximum with the within-driver affiinitization. A module parameter was created to allow the value set for the nr_hw_queues to be tunable. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Ming Lei Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 1 + drivers/scsi/lpfc/lpfc_attr.c | 15 +++++++++++++++ drivers/scsi/lpfc/lpfc_init.c | 10 ++++++---- drivers/scsi/lpfc/lpfc_sli4.h | 5 +++++ 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 2c3bb8a966e5..bade2e025ecf 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -824,6 +824,7 @@ struct lpfc_hba { uint32_t cfg_cq_poll_threshold; uint32_t cfg_cq_max_proc_limit; uint32_t cfg_fcp_cpu_map; + uint32_t cfg_fcp_mq_threshold; uint32_t cfg_hdw_queue; uint32_t cfg_irq_chann; uint32_t cfg_suppress_rsp; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index ea62322ffe2b..8d8c495b5b60 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -5708,6 +5708,19 @@ LPFC_ATTR_RW(nvme_oas, 0, 0, 1, LPFC_ATTR_RW(nvme_embed_cmd, 1, 0, 2, "Embed NVME Command in WQE"); +/* + * lpfc_fcp_mq_threshold: Set the maximum number of Hardware Queues + * the driver will advertise it supports to the SCSI layer. + * + * 0 = Set nr_hw_queues by the number of CPUs or HW queues. + * 1,128 = Manually specify the maximum nr_hw_queue value to be set, + * + * Value range is [0,128]. Default value is 8. + */ +LPFC_ATTR_R(fcp_mq_threshold, LPFC_FCP_MQ_THRESHOLD_DEF, + LPFC_FCP_MQ_THRESHOLD_MIN, LPFC_FCP_MQ_THRESHOLD_MAX, + "Set the number of SCSI Queues advertised"); + /* * lpfc_hdw_queue: Set the number of Hardware Queues the driver * will advertise it supports to the NVME and SCSI layers. This also @@ -6030,6 +6043,7 @@ struct device_attribute *lpfc_hba_attrs[] = { &dev_attr_lpfc_cq_poll_threshold, &dev_attr_lpfc_cq_max_proc_limit, &dev_attr_lpfc_fcp_cpu_map, + &dev_attr_lpfc_fcp_mq_threshold, &dev_attr_lpfc_hdw_queue, &dev_attr_lpfc_irq_chann, &dev_attr_lpfc_suppress_rsp, @@ -7112,6 +7126,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba) /* Initialize first burst. Target vs Initiator are different. */ lpfc_nvme_enable_fb_init(phba, lpfc_nvme_enable_fb); lpfc_nvmet_fb_size_init(phba, lpfc_nvmet_fb_size); + lpfc_fcp_mq_threshold_init(phba, lpfc_fcp_mq_threshold); lpfc_hdw_queue_init(phba, lpfc_hdw_queue); lpfc_irq_chann_init(phba, lpfc_irq_chann); lpfc_enable_bbcr_init(phba, lpfc_enable_bbcr); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index a7549ae32542..1ac98becb5ba 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -4309,10 +4309,12 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) shost->max_cmd_len = 16; if (phba->sli_rev == LPFC_SLI_REV4) { - if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) - shost->nr_hw_queues = phba->cfg_hdw_queue; - else - shost->nr_hw_queues = phba->sli4_hba.num_present_cpu; + if (!phba->cfg_fcp_mq_threshold || + phba->cfg_fcp_mq_threshold > phba->cfg_hdw_queue) + phba->cfg_fcp_mq_threshold = phba->cfg_hdw_queue; + + shost->nr_hw_queues = min_t(int, 2 * num_possible_nodes(), + phba->cfg_fcp_mq_threshold); shost->dma_boundary = phba->sli4_hba.pc_sli4_params.sge_supp_len-1; diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 3aeca387b22a..329f7aa7e169 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -44,6 +44,11 @@ #define LPFC_HBA_HDWQ_MAX 128 #define LPFC_HBA_HDWQ_DEF 0 +/* FCP MQ queue count limiting */ +#define LPFC_FCP_MQ_THRESHOLD_MIN 0 +#define LPFC_FCP_MQ_THRESHOLD_MAX 128 +#define LPFC_FCP_MQ_THRESHOLD_DEF 8 + /* Common buffer size to accomidate SCSI and NVME IO buffers */ #define LPFC_COMMON_IO_BUF_SZ 768 -- cgit v1.2.1 From 936376f88ff1845b384b3a82b9cd167e53039229 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Aug 2019 10:08:38 +0900 Subject: arm: select the dma-noncoherent symbols for all swiotlb builds We need to provide the arch hooks for non-coherent dma-direct and swiotlb for all swiotlb builds, not just when LPAS is enabled. Without that the Xen build that selects SWIOTLB indirectly through SWIOTLB_XEN fails to build. Fixes: ad3c7b18c5b3 ("arm: use swiotlb for bounce buffering on LPAE configs") Reported-by: Stefan Wahren Signed-off-by: Christoph Hellwig Tested-by: Stefan Wahren --- arch/arm/Kconfig | 4 ++++ arch/arm/mm/Kconfig | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 33b00579beff..24360211534a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -7,6 +7,8 @@ config ARM select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB + select ARCH_HAS_DMA_MMAP_PGPROT if SWIOTLB select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KEEPINITRD @@ -18,6 +20,8 @@ config ARM select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL select ARCH_HAS_STRICT_MODULE_RWX if MMU + select ARCH_HAS_SYNC_DMA_FOR_DEVICE if SWIOTLB + select ARCH_HAS_SYNC_DMA_FOR_CPU if SWIOTLB select ARCH_HAS_TEARDOWN_DMA_OPS if MMU select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index c54cd7ed90ba..c1222c0e9fd3 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -664,10 +664,6 @@ config ARM_LPAE !CPU_32v4 && !CPU_32v3 select PHYS_ADDR_T_64BIT select SWIOTLB - select ARCH_HAS_DMA_COHERENT_TO_PFN - select ARCH_HAS_DMA_MMAP_PGPROT - select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select ARCH_HAS_SYNC_DMA_FOR_CPU help Say Y if you have an ARMv7 processor supporting the LPAE page table format and you would like to access memory beyond the -- cgit v1.2.1 From 1a15718b41df026cffd0e42cfdc38a1384ce19f9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 20 Aug 2019 08:58:12 +0200 Subject: ALSA: usb-audio: Add implicit fb quirk for Behringer UFX1604 Behringer UFX1604 requires the similar quirk to apply implicit fb like another Behringer model UFX1204 in order to fix the noisy playback. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=204631 Cc: Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 75b96929f76c..e4bbf79de956 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -339,6 +339,7 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, ep = 0x81; ifnum = 2; goto add_sync_ep_from_ifnum; + case USB_ID(0x1397, 0x0001): /* Behringer UFX1604 */ case USB_ID(0x1397, 0x0002): /* Behringer UFX1204 */ ep = 0x81; ifnum = 1; -- cgit v1.2.1 From 2ca371d847511f97ef991ef612a2ce805489840e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Rekowski?= Date: Mon, 19 Aug 2019 22:40:07 +0200 Subject: ALSA: hda/ca0132 - Add new SBZ quirk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a new PCI subsys ID for the SBZ, as found and tested by me and some reddit users. Link: https://lore.kernel.org/lkml/20190819204008.14426-1-p.rekowski@gmail.com Signed-off-by: Paweł Rekowski Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 0d51823d7270..6d1fb7c11f17 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -1175,6 +1175,7 @@ static const struct snd_pci_quirk ca0132_quirks[] = { SND_PCI_QUIRK(0x1028, 0x0708, "Alienware 15 R2 2016", QUIRK_ALIENWARE), SND_PCI_QUIRK(0x1102, 0x0010, "Sound Blaster Z", QUIRK_SBZ), SND_PCI_QUIRK(0x1102, 0x0023, "Sound Blaster Z", QUIRK_SBZ), + SND_PCI_QUIRK(0x1102, 0x0027, "Sound Blaster Z", QUIRK_SBZ), SND_PCI_QUIRK(0x1102, 0x0033, "Sound Blaster ZxR", QUIRK_SBZ), SND_PCI_QUIRK(0x1458, 0xA016, "Recon3Di", QUIRK_R3DI), SND_PCI_QUIRK(0x1458, 0xA026, "Gigabyte G1.Sniper Z97", QUIRK_R3DI), -- cgit v1.2.1 From fcf887e7caaa813eea821d11bf2b7619a37df37a Mon Sep 17 00:00:00 2001 From: Aaron Armstrong Skomra Date: Fri, 16 Aug 2019 12:00:54 -0700 Subject: HID: wacom: correct misreported EKR ring values The EKR ring claims a range of 0 to 71 but actually reports values 1 to 72. The ring is used in relative mode so this change should not affect users. Signed-off-by: Aaron Armstrong Skomra Fixes: 72b236d60218f ("HID: wacom: Add support for Express Key Remote.") Cc: # v4.3+ Reviewed-by: Ping Cheng Reviewed-by: Jason Gerecke Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 7a9e229e6253..1713235d28cb 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1061,7 +1061,7 @@ static int wacom_remote_irq(struct wacom_wac *wacom_wac, size_t len) input_report_key(input, BTN_BASE2, (data[11] & 0x02)); if (data[12] & 0x80) - input_report_abs(input, ABS_WHEEL, (data[12] & 0x7f)); + input_report_abs(input, ABS_WHEEL, (data[12] & 0x7f) - 1); else input_report_abs(input, ABS_WHEEL, 0); -- cgit v1.2.1 From a180d023ec7ba0e43b2385876950d9ce7ab618f1 Mon Sep 17 00:00:00 2001 From: Nishka Dasgupta Date: Mon, 19 Aug 2019 13:21:26 +0530 Subject: auxdisplay: ht16k33: Make ht16k33_fb_fix and ht16k33_fb_var constant The static structures ht16k33_fb_fix and ht16k33_fb_var, of types fb_fix_screeninfo and fb_var_screeninfo respectively, are not used except to be copied into other variables. Hence make both of them constant to prevent unintended modification. Issue found with Coccinelle. Acked-by: Robin van der Gracht Signed-off-by: Nishka Dasgupta Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/ht16k33.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c index 9c0bb771751d..a2fcde582e2a 100644 --- a/drivers/auxdisplay/ht16k33.c +++ b/drivers/auxdisplay/ht16k33.c @@ -74,7 +74,7 @@ struct ht16k33_priv { struct ht16k33_fbdev fbdev; }; -static struct fb_fix_screeninfo ht16k33_fb_fix = { +static const struct fb_fix_screeninfo ht16k33_fb_fix = { .id = DRIVER_NAME, .type = FB_TYPE_PACKED_PIXELS, .visual = FB_VISUAL_MONO10, @@ -85,7 +85,7 @@ static struct fb_fix_screeninfo ht16k33_fb_fix = { .accel = FB_ACCEL_NONE, }; -static struct fb_var_screeninfo ht16k33_fb_var = { +static const struct fb_var_screeninfo ht16k33_fb_var = { .xres = HT16K33_MATRIX_LED_MAX_ROWS, .yres = HT16K33_MATRIX_LED_MAX_COLS, .xres_virtual = HT16K33_MATRIX_LED_MAX_ROWS, -- cgit v1.2.1 From 8f2d163cb26da87e7d8e1677368b8ba1ba4d30b3 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 18 Jul 2019 12:38:10 +0200 Subject: mt76: mt76x0u: do not reset radio on resume On some machines mt76x0u firmware can hung during resume, what result on messages like below: [ 475.480062] mt76x0 1-8:1.0: Error: MCU response pre-completed! [ 475.990066] mt76x0 1-8:1.0: Error: send MCU cmd failed:-110 [ 475.990075] mt76x0 1-8:1.0: Error: MCU response pre-completed! [ 476.500003] mt76x0 1-8:1.0: Error: send MCU cmd failed:-110 [ 476.500012] mt76x0 1-8:1.0: Error: MCU response pre-completed! [ 477.010046] mt76x0 1-8:1.0: Error: send MCU cmd failed:-110 [ 477.010055] mt76x0 1-8:1.0: Error: MCU response pre-completed! [ 477.529997] mt76x0 1-8:1.0: Error: send MCU cmd failed:-110 [ 477.530006] mt76x0 1-8:1.0: Error: MCU response pre-completed! [ 477.824907] mt76x0 1-8:1.0: Error: send MCU cmd failed:-71 [ 477.824916] mt76x0 1-8:1.0: Error: MCU response pre-completed! [ 477.825029] usb 1-8: USB disconnect, device number 6 and possible whole system freeze. This can be avoided, if we do not perform mt76x0_chip_onoff() reset. Cc: stable@vger.kernel.org Fixes: 134b2d0d1fcf ("mt76x0: init files") Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt76x0/usb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c index 627ed1fc7b15..645f4d15fb61 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c @@ -136,11 +136,11 @@ static const struct ieee80211_ops mt76x0u_ops = { .release_buffered_frames = mt76_release_buffered_frames, }; -static int mt76x0u_init_hardware(struct mt76x02_dev *dev) +static int mt76x0u_init_hardware(struct mt76x02_dev *dev, bool reset) { int err; - mt76x0_chip_onoff(dev, true, true); + mt76x0_chip_onoff(dev, true, reset); if (!mt76x02_wait_for_mac(&dev->mt76)) return -ETIMEDOUT; @@ -173,7 +173,7 @@ static int mt76x0u_register_device(struct mt76x02_dev *dev) if (err < 0) goto out_err; - err = mt76x0u_init_hardware(dev); + err = mt76x0u_init_hardware(dev, true); if (err < 0) goto out_err; @@ -309,7 +309,7 @@ static int __maybe_unused mt76x0_resume(struct usb_interface *usb_intf) if (ret < 0) goto err; - ret = mt76x0u_init_hardware(dev); + ret = mt76x0u_init_hardware(dev, false); if (ret) goto err; -- cgit v1.2.1 From 95844124385eae4bd9ca5f9514a0fc33d561ac3c Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 19 Aug 2019 13:20:07 +0200 Subject: rt2x00: clear IV's on start to fix AP mode regression To do not brake HW restart we should keep initialization vectors data. I assumed that on start the data is already initialized to zeros, but that not true on some scenarios and we should clear it. So add additional flag to check if we are under HW restart and clear IV's data if we are not. Patch fixes AP mode regression. Reported-and-tested-by: Emil Karlson Fixes: 710e6cc1595e ("rt2800: do not nullify initialization vector data") Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 9 +++++++++ drivers/net/wireless/ralink/rt2x00/rt2x00.h | 1 + drivers/net/wireless/ralink/rt2x00/rt2x00dev.c | 13 ++++++++----- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index c9b957ac5733..ecbe78b8027b 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -6094,6 +6094,15 @@ static int rt2800_init_registers(struct rt2x00_dev *rt2x00dev) rt2800_delete_wcid_attr(rt2x00dev, i); } + /* + * Clear encryption initialization vectors on start, but keep them + * for watchdog reset. Otherwise we will have wrong IVs and not be + * able to keep connections after reset. + */ + if (!test_bit(DEVICE_STATE_RESET, &rt2x00dev->flags)) + for (i = 0; i < 256; i++) + rt2800_register_write(rt2x00dev, MAC_IVEIV_ENTRY(i), 0); + /* * Clear all beacons */ diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h index 7e43690a861c..2b216edd0c7d 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h @@ -658,6 +658,7 @@ enum rt2x00_state_flags { DEVICE_STATE_ENABLED_RADIO, DEVICE_STATE_SCANNING, DEVICE_STATE_FLUSHING, + DEVICE_STATE_RESET, /* * Driver configuration diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c index 35414f97a978..9d158237ac67 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c @@ -1256,13 +1256,14 @@ static int rt2x00lib_initialize(struct rt2x00_dev *rt2x00dev) int rt2x00lib_start(struct rt2x00_dev *rt2x00dev) { - int retval; + int retval = 0; if (test_bit(DEVICE_STATE_STARTED, &rt2x00dev->flags)) { /* * This is special case for ieee80211_restart_hw(), otherwise * mac80211 never call start() two times in row without stop(); */ + set_bit(DEVICE_STATE_RESET, &rt2x00dev->flags); rt2x00dev->ops->lib->pre_reset_hw(rt2x00dev); rt2x00lib_stop(rt2x00dev); } @@ -1273,14 +1274,14 @@ int rt2x00lib_start(struct rt2x00_dev *rt2x00dev) */ retval = rt2x00lib_load_firmware(rt2x00dev); if (retval) - return retval; + goto out; /* * Initialize the device. */ retval = rt2x00lib_initialize(rt2x00dev); if (retval) - return retval; + goto out; rt2x00dev->intf_ap_count = 0; rt2x00dev->intf_sta_count = 0; @@ -1289,11 +1290,13 @@ int rt2x00lib_start(struct rt2x00_dev *rt2x00dev) /* Enable the radio */ retval = rt2x00lib_enable_radio(rt2x00dev); if (retval) - return retval; + goto out; set_bit(DEVICE_STATE_STARTED, &rt2x00dev->flags); - return 0; +out: + clear_bit(DEVICE_STATE_RESET, &rt2x00dev->flags); + return retval; } void rt2x00lib_stop(struct rt2x00_dev *rt2x00dev) -- cgit v1.2.1 From 50f5604476b2bd728910b2e1803a6eafd0eeaf3d Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Fri, 16 Aug 2019 15:55:51 +0300 Subject: iwlwifi: mvm: Allow multicast data frames only when associated The MAC context configuration always allowed multicast data frames to pass to the driver for all MAC context types, and in the case of station MAC context both when associated and when not associated. One of the outcomes of this configuration is having the FW forward encrypted multicast frames to the driver with Rx status indicating that the frame was not decrypted (as expected, since no keys were configured yet) which in turn results with unnecessary error messages. Change this behavior to allow multicast data frames only when they are actually expected, e.g., station MAC context is associated etc. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 33 ++++++++++++++++++++--- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 10 +++++++ 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index cb22d447fcb8..fe776e35b9d0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -554,7 +554,7 @@ static void iwl_mvm_mac_ctxt_cmd_common(struct iwl_mvm *mvm, cpu_to_le32(vif->bss_conf.use_short_slot ? MAC_FLG_SHORT_SLOT : 0); - cmd->filter_flags = cpu_to_le32(MAC_FILTER_ACCEPT_GRP); + cmd->filter_flags = 0; for (i = 0; i < IEEE80211_NUM_ACS; i++) { u8 txf = iwl_mvm_mac_ac_to_tx_fifo(mvm, i); @@ -623,6 +623,8 @@ static int iwl_mvm_mac_ctxt_cmd_sta(struct iwl_mvm *mvm, /* We need the dtim_period to set the MAC as associated */ if (vif->bss_conf.assoc && vif->bss_conf.dtim_period && !force_assoc_off) { + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + u8 ap_sta_id = mvmvif->ap_sta_id; u32 dtim_offs; /* @@ -658,6 +660,29 @@ static int iwl_mvm_mac_ctxt_cmd_sta(struct iwl_mvm *mvm, dtim_offs); ctxt_sta->is_assoc = cpu_to_le32(1); + + /* + * allow multicast data frames only as long as the station is + * authorized, i.e., GTK keys are already installed (if needed) + */ + if (ap_sta_id < IWL_MVM_STATION_COUNT) { + struct ieee80211_sta *sta; + + rcu_read_lock(); + + sta = rcu_dereference(mvm->fw_id_to_mac_id[ap_sta_id]); + if (!IS_ERR_OR_NULL(sta)) { + struct iwl_mvm_sta *mvmsta = + iwl_mvm_sta_from_mac80211(sta); + + if (mvmsta->sta_state == + IEEE80211_STA_AUTHORIZED) + cmd.filter_flags |= + cpu_to_le32(MAC_FILTER_ACCEPT_GRP); + } + + rcu_read_unlock(); + } } else { ctxt_sta->is_assoc = cpu_to_le32(0); @@ -703,7 +728,8 @@ static int iwl_mvm_mac_ctxt_cmd_listener(struct iwl_mvm *mvm, MAC_FILTER_IN_CONTROL_AND_MGMT | MAC_FILTER_IN_BEACON | MAC_FILTER_IN_PROBE_REQUEST | - MAC_FILTER_IN_CRC32); + MAC_FILTER_IN_CRC32 | + MAC_FILTER_ACCEPT_GRP); ieee80211_hw_set(mvm->hw, RX_INCLUDES_FCS); /* Allocate sniffer station */ @@ -727,7 +753,8 @@ static int iwl_mvm_mac_ctxt_cmd_ibss(struct iwl_mvm *mvm, iwl_mvm_mac_ctxt_cmd_common(mvm, vif, &cmd, NULL, action); cmd.filter_flags = cpu_to_le32(MAC_FILTER_IN_BEACON | - MAC_FILTER_IN_PROBE_REQUEST); + MAC_FILTER_IN_PROBE_REQUEST | + MAC_FILTER_ACCEPT_GRP); /* cmd.ibss.beacon_time/cmd.ibss.beacon_tsf are curently ignored */ cmd.ibss.bi = cpu_to_le32(vif->bss_conf.beacon_int); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 1c904b5226aa..a7bc00d1296f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -3327,10 +3327,20 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw, /* enable beacon filtering */ WARN_ON(iwl_mvm_enable_beacon_filter(mvm, vif, 0)); + /* + * Now that the station is authorized, i.e., keys were already + * installed, need to indicate to the FW that + * multicast data frames can be forwarded to the driver + */ + iwl_mvm_mac_ctxt_changed(mvm, vif, false, NULL); + iwl_mvm_rs_rate_init(mvm, sta, mvmvif->phy_ctxt->channel->band, true); } else if (old_state == IEEE80211_STA_AUTHORIZED && new_state == IEEE80211_STA_ASSOC) { + /* Multicast data frames are no longer allowed */ + iwl_mvm_mac_ctxt_changed(mvm, vif, false, NULL); + /* disable beacon filtering */ ret = iwl_mvm_disable_beacon_filter(mvm, vif, 0); WARN_ON(ret && -- cgit v1.2.1 From 884b75696873f5338c57a2613763ea8f37b4e26b Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 16 Aug 2019 15:55:52 +0300 Subject: iwlwifi: pcie: fix the byte count table format for 22560 devices Starting from 22560, the byte count is expected to be in bytes and we have now 14 bits. Ajust the code to this. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 38d110338987..9ef6b8fe03c1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -99,10 +99,7 @@ void iwl_pcie_gen2_update_byte_tbl(struct iwl_trans_pcie *trans_pcie, u16 len = byte_cnt; __le16 bc_ent; - if (trans_pcie->bc_table_dword) - len = DIV_ROUND_UP(len, 4); - - if (WARN_ON(len > 0xFFF || idx >= txq->n_window)) + if (WARN(idx >= txq->n_window, "%d >= %d\n", idx, txq->n_window)) return; filled_tfd_size = offsetof(struct iwl_tfh_tfd, tbs) + @@ -117,11 +114,20 @@ void iwl_pcie_gen2_update_byte_tbl(struct iwl_trans_pcie *trans_pcie, */ num_fetch_chunks = DIV_ROUND_UP(filled_tfd_size, 64) - 1; - bc_ent = cpu_to_le16(len | (num_fetch_chunks << 12)); - if (trans->cfg->device_family >= IWL_DEVICE_FAMILY_22560) + if (trans->cfg->device_family >= IWL_DEVICE_FAMILY_22560) { + /* Starting from 22560, the HW expects bytes */ + WARN_ON(trans_pcie->bc_table_dword); + WARN_ON(len > 0x3FFF); + bc_ent = cpu_to_le16(len | (num_fetch_chunks << 14)); scd_bc_tbl_gen3->tfd_offset[idx] = bc_ent; - else + } else { + /* Until 22560, the HW expects DW */ + WARN_ON(!trans_pcie->bc_table_dword); + len = DIV_ROUND_UP(len, 4); + WARN_ON(len > 0xFFF); + bc_ent = cpu_to_le16(len | (num_fetch_chunks << 12)); scd_bc_tbl->tfd_offset[idx] = bc_ent; + } } /* -- cgit v1.2.1 From 17e40e6979aaf60f356331bac129df20e1fd74a0 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 16 Aug 2019 15:55:53 +0300 Subject: iwlwifi: pcie: don't switch FW to qnj when ax201 is detected We have a too generic condition that switches from Qu configurations to QnJ configurations. We need to exclude some configurations so that they are not erroneously switched. Add the ax201 configuration to the list of exclusions. Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index f5df5b370d78..935e35dafce5 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3603,6 +3603,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) && ((trans->cfg != &iwl_ax200_cfg_cc && + trans->cfg != &iwl_ax201_cfg_qu_hr && trans->cfg != &killer1650x_2ax_cfg && trans->cfg != &killer1650w_2ax_cfg && trans->cfg != &iwl_ax201_cfg_quz_hr) || -- cgit v1.2.1 From 5a8c31aa63578cb0ff390a57537f1cb4b312a1ed Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 16 Aug 2019 15:55:54 +0300 Subject: iwlwifi: pcie: fix recognition of QuZ devices If the HW revision of Qu devices we found is QuZ, then we need to switch the configuration accordingly in order to use the correct FW. Add a block of ifs in order do that. Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index de711c1160d3..7c5aaeaf7fe5 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1063,6 +1063,23 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) else if (iwl_trans->cfg == &iwl9560_2ac_160_cfg_qu_b0_jf_b0) iwl_trans->cfg = &iwl9560_2ac_160_cfg_qu_c0_jf_b0; } + + /* same thing for QuZ... */ + if (iwl_trans->hw_rev == CSR_HW_REV_TYPE_QUZ) { + if (cfg == &iwl_ax101_cfg_qu_hr) + cfg = &iwl_ax101_cfg_quz_hr; + else if (cfg == &iwl_ax201_cfg_qu_hr) + cfg = &iwl_ax201_cfg_quz_hr; + else if (cfg == &iwl9461_2ac_cfg_qu_b0_jf_b0) + cfg = &iwl9461_2ac_cfg_quz_a0_jf_b0_soc; + else if (cfg == &iwl9462_2ac_cfg_qu_b0_jf_b0) + cfg = &iwl9462_2ac_cfg_quz_a0_jf_b0_soc; + else if (cfg == &iwl9560_2ac_cfg_qu_b0_jf_b0) + cfg = &iwl9560_2ac_cfg_quz_a0_jf_b0_soc; + else if (cfg == &iwl9560_2ac_160_cfg_qu_b0_jf_b0) + cfg = &iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc; + } + #endif pci_set_drvdata(pdev, iwl_trans); -- cgit v1.2.1 From fb89c39455e4b49881c5a42761bd71f03d3ef888 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Thu, 15 Aug 2019 23:56:35 +0300 Subject: xdp: unpin xdp umem pages in error path Fix mem leak caused by missed unpin routine for umem pages. Fixes: 8aef7340ae9695 ("xsk: introduce xdp_umem_page") Signed-off-by: Ivan Khoronzhuk Acked-by: Jonathan Lemon Signed-off-by: Daniel Borkmann --- net/xdp/xdp_umem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 83de74ca729a..688aac7a6943 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -365,7 +365,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL); if (!umem->pages) { err = -ENOMEM; - goto out_account; + goto out_pin; } for (i = 0; i < umem->npgs; i++) @@ -373,6 +373,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) return 0; +out_pin: + xdp_umem_unpin_pages(umem); out_account: xdp_umem_unaccount_pages(umem); return err; -- cgit v1.2.1 From 16c75963723dfd8d7ca719527052f16be7258a23 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 20 Aug 2019 03:06:40 +0000 Subject: Drivers: hv: vmbus: Remove the unused "tsc_page" from struct hv_context This field is no longer used after the commit 63ed4e0c67df ("Drivers: hv: vmbus: Consolidate all Hyper-V specific clocksource code") , because it's replaced by the global variable "struct ms_hyperv_tsc_page *tsc_pg;" (now, the variable is in drivers/clocksource/hyperv_timer.c). Fixes: 63ed4e0c67df ("Drivers: hv: vmbus: Consolidate all Hyper-V specific clocksource code") Signed-off-by: Dexuan Cui Signed-off-by: Sasha Levin --- drivers/hv/hyperv_vmbus.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 362e70e9d145..fb16a622e8ab 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -146,8 +146,6 @@ struct hv_context { */ u64 guestid; - void *tsc_page; - struct hv_per_cpu_context __percpu *cpu_context; /* -- cgit v1.2.1 From d09bc83640d524b8467a660db7b1d15e6562a1de Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 20 Aug 2019 03:01:23 +0000 Subject: Input: hyperv-keyboard: Use in-place iterator API in the channel callback Simplify the ring buffer handling with the in-place API. Also avoid the dynamic allocation and the memory leak in the channel callback function. Signed-off-by: Dexuan Cui Acked-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/serio/hyperv-keyboard.c | 35 ++++++----------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c index 88ae7c2ac3c8..e486a8a74c40 100644 --- a/drivers/input/serio/hyperv-keyboard.c +++ b/drivers/input/serio/hyperv-keyboard.c @@ -237,40 +237,17 @@ static void hv_kbd_handle_received_packet(struct hv_device *hv_dev, static void hv_kbd_on_channel_callback(void *context) { + struct vmpacket_descriptor *desc; struct hv_device *hv_dev = context; - void *buffer; - int bufferlen = 0x100; /* Start with sensible size */ u32 bytes_recvd; u64 req_id; - int error; - buffer = kmalloc(bufferlen, GFP_ATOMIC); - if (!buffer) - return; - - while (1) { - error = vmbus_recvpacket_raw(hv_dev->channel, buffer, bufferlen, - &bytes_recvd, &req_id); - switch (error) { - case 0: - if (bytes_recvd == 0) { - kfree(buffer); - return; - } - - hv_kbd_handle_received_packet(hv_dev, buffer, - bytes_recvd, req_id); - break; + foreach_vmbus_pkt(desc, hv_dev->channel) { + bytes_recvd = desc->len8 * 8; + req_id = desc->trans_id; - case -ENOBUFS: - kfree(buffer); - /* Handle large packet */ - bufferlen = bytes_recvd; - buffer = kmalloc(bytes_recvd, GFP_ATOMIC); - if (!buffer) - return; - break; - } + hv_kbd_handle_received_packet(hv_dev, desc, bytes_recvd, + req_id); } } -- cgit v1.2.1 From 89eb4d8d25722a0a0194cf7fa47ba602e32a6da7 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 19 Aug 2019 16:44:09 +0200 Subject: Tools: hv: kvp: eliminate 'may be used uninitialized' warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building hv_kvp_daemon GCC-8.3 complains: hv_kvp_daemon.c: In function ‘kvp_get_ip_info.constprop’: hv_kvp_daemon.c:812:30: warning: ‘ip_buffer’ may be used uninitialized in this function [-Wmaybe-uninitialized] struct hv_kvp_ipaddr_value *ip_buffer; this seems to be a false positive: we only use ip_buffer when op == KVP_OP_GET_IP_INFO and it is only unset when op == KVP_OP_ENUMERATE. Silence the warning by initializing ip_buffer to NULL. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Sasha Levin --- tools/hv/hv_kvp_daemon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index f5597503c771..e9ef4ca6a655 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -809,7 +809,7 @@ kvp_get_ip_info(int family, char *if_name, int op, int sn_offset = 0; int error = 0; char *buffer; - struct hv_kvp_ipaddr_value *ip_buffer; + struct hv_kvp_ipaddr_value *ip_buffer = NULL; char cidr_mask[5]; /* /xyz */ int weight; int i; -- cgit v1.2.1 From a9fc4340aee041dd186d1fb8f1b5d1e9caf28212 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 7 May 2019 07:46:55 +0000 Subject: Drivers: hv: vmbus: Fix virt_to_hvpfn() for X86_PAE In the case of X86_PAE, unsigned long is u32, but the physical address type should be u64. Due to the bug here, the netvsc driver can not load successfully, and sometimes the VM can panic due to memory corruption (the hypervisor writes data to the wrong location). Fixes: 6ba34171bcbd ("Drivers: hv: vmbus: Remove use of slow_virt_to_phys()") Cc: stable@vger.kernel.org Cc: Michael Kelley Reported-and-tested-by: Juliana Rodrigueiro Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin --- drivers/hv/channel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 5f9505a087f6..23f358cb7f49 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -26,7 +26,7 @@ static unsigned long virt_to_hvpfn(void *addr) { - unsigned long paddr; + phys_addr_t paddr; if (is_vmalloc_addr(addr)) paddr = page_to_phys(vmalloc_to_page(addr)) + -- cgit v1.2.1 From 500f9fbadef86466a435726192f4ca4df7d94236 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 19 Aug 2019 12:15:59 -0600 Subject: io_uring: fix potential hang with polled IO If a request issue ends up being punted to async context to avoid blocking, we can get into a situation where the original application enters the poll loop for that very request before it has been issued. This should not be an issue, except that the polling will hold the io_uring uring_ctx mutex for the duration of the poll. When the async worker has actually issued the request, it needs to acquire this mutex to add the request to the poll issued list. Since the application polling is already holding this mutex, the workqueue sleeps on the mutex forever, and the application thus never gets a chance to poll for the very request it was interested in. Fix this by ensuring that the polling drops the uring_ctx occasionally if it's not making any progress. Reported-by: Jeffrey M. Birnbaum Signed-off-by: Jens Axboe --- fs/io_uring.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 24bbe3cb7ad4..36f04d0b197b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -805,11 +805,34 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx) static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, long min) { - int ret = 0; + int iters, ret = 0; + + /* + * We disallow the app entering submit/complete with polling, but we + * still need to lock the ring to prevent racing with polled issue + * that got punted to a workqueue. + */ + mutex_lock(&ctx->uring_lock); + iters = 0; do { int tmin = 0; + /* + * If a submit got punted to a workqueue, we can have the + * application entering polling for a command before it gets + * issued. That app will hold the uring_lock for the duration + * of the poll right here, so we need to take a breather every + * now and then to ensure that the issue has a chance to add + * the poll to the issued list. Otherwise we can spin here + * forever, while the workqueue is stuck trying to acquire the + * very same mutex. + */ + if (!(++iters & 7)) { + mutex_unlock(&ctx->uring_lock); + mutex_lock(&ctx->uring_lock); + } + if (*nr_events < min) tmin = min - *nr_events; @@ -819,6 +842,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, ret = 0; } while (min && !*nr_events && !need_resched()); + mutex_unlock(&ctx->uring_lock); return ret; } @@ -2280,15 +2304,7 @@ static int io_sq_thread(void *data) unsigned nr_events = 0; if (ctx->flags & IORING_SETUP_IOPOLL) { - /* - * We disallow the app entering submit/complete - * with polling, but we still need to lock the - * ring to prevent racing with polled issue - * that got punted to a workqueue. - */ - mutex_lock(&ctx->uring_lock); io_iopoll_check(ctx, &nr_events, 0); - mutex_unlock(&ctx->uring_lock); } else { /* * Normal IO, just pretend everything completed. @@ -3190,9 +3206,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, min_complete = min(min_complete, ctx->cq_entries); if (ctx->flags & IORING_SETUP_IOPOLL) { - mutex_lock(&ctx->uring_lock); ret = io_iopoll_check(ctx, &nr_events, min_complete); - mutex_unlock(&ctx->uring_lock); } else { ret = io_cqring_wait(ctx, min_complete, sig, sigsz); } -- cgit v1.2.1 From 504db087aaccdb32af61539916409f7dca31ceb5 Mon Sep 17 00:00:00 2001 From: Anton Eidelman Date: Mon, 12 Aug 2019 23:00:36 +0300 Subject: nvme-multipath: fix possible I/O hang when paths are updated nvme_state_set_live() making a path available triggers requeue_work in order to resubmit requests that ended up on requeue_list when no paths were available. This requeue_work may race with concurrent nvme_ns_head_make_request() that do not observe the live path yet. Such concurrent requests may by made by either: - New IO submission. - Requeue_work triggered by nvme_failover_req() or another ana_work. A race may cause requeue_work capture the state of requeue_list before more requests get onto the list. These requests will stay on the list forever unless requeue_work is triggered again. In order to prevent such race, nvme_state_set_live() should synchronize_srcu(&head->srcu) before triggering the requeue_work and prevent nvme_ns_head_make_request referencing an old snapshot of the path list. Reviewed-by: Christoph Hellwig Signed-off-by: Anton Eidelman Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/multipath.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 888d4543894e..af831d3d15d0 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -428,6 +428,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) srcu_read_unlock(&head->srcu, srcu_idx); } + synchronize_srcu(&ns->head->srcu); kblockd_schedule_work(&ns->head->requeue_work); } -- cgit v1.2.1 From a89fcca8185633993018dc081d6b021d005e6d0b Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Wed, 14 Aug 2019 11:26:10 -0300 Subject: nvme: Fix cntlid validation when not using NVMEoF Commit 1b1031ca63b2 ("nvme: validate cntlid during controller initialisation") introduced a validation for controllers with duplicate cntlid that runs on nvme_init_subsystem(). The problem is that the validation relies on ctrl->cntlid, and this value is assigned (from id_ctrl value) after the call for nvme_init_subsystem() in nvme_init_identify() for non-fabrics scenario. That leads to ctrl->cntlid always being 0 in case we have a physical set of controllers in the same subsystem. This patch fixes that by loading the discovered cntlid id_ctrl value into ctrl->cntlid before the subsystem initialization, only for the non-fabrics case. The patch was tested with emulated nvme devices (qemu) having two controllers in a single subsystem. Without the patch, we couldn't make it work failing in the duplicate check; when running with the patch, we could see the subsystem holding both controllers. For the fabrics case we see ctrl->cntlid has a more intricate relation with the admin connect, so we didn't change that. Fixes: 1b1031ca63b2 ("nvme: validate cntlid during controller initialisation") Signed-off-by: Guilherme G. Piccoli Reviewed-by: Sagi Grimberg Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c258a1ce4b28..fea83fd95252 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2597,6 +2597,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) goto out_free; } + if (!(ctrl->ops->flags & NVME_F_FABRICS)) + ctrl->cntlid = le16_to_cpu(id->cntlid); + if (!ctrl->identified) { int i; @@ -2697,7 +2700,6 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) goto out_free; } } else { - ctrl->cntlid = le16_to_cpu(id->cntlid); ctrl->hmpre = le32_to_cpu(id->hmpre); ctrl->hmmin = le32_to_cpu(id->hmmin); ctrl->hmminds = le32_to_cpu(id->hmminds); -- cgit v1.2.1 From cb32de1b7e2591f844f18a5513fde8e2bd49bce0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 16 Aug 2019 15:16:19 -0500 Subject: nvme: Add quirk for LiteON CL1 devices running FW 22301111 One of the components in LiteON CL1 device has limitations that can be encountered based upon boundary race conditions using the nvme bus specific suspend to idle flow. When this situation occurs the drive doesn't resume properly from suspend-to-idle. LiteON has confirmed this problem and fixed in the next firmware version. As this firmware is already in the field, avoid running nvme specific suspend to idle flow. Fixes: d916b1be94b6 ("nvme-pci: use host managed power state for suspend") Link: http://lists.infradead.org/pipermail/linux-nvme/2019-July/thread.html Signed-off-by: Mario Limonciello Signed-off-by: Charles Hyde Reviewed-by: Keith Busch Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 10 ++++++++++ drivers/nvme/host/nvme.h | 5 +++++ drivers/nvme/host/pci.c | 3 ++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fea83fd95252..d3d6b7bd6903 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2257,6 +2257,16 @@ static const struct nvme_core_quirk_entry core_quirks[] = { .vid = 0x1179, .mn = "THNSF5256GPUK TOSHIBA", .quirks = NVME_QUIRK_NO_APST, + }, + { + /* + * This LiteON CL1-3D*-Q11 firmware version has a race + * condition associated with actions related to suspend to idle + * LiteON has resolved the problem in future firmware + */ + .vid = 0x14a4, + .fr = "22301111", + .quirks = NVME_QUIRK_SIMPLE_SUSPEND, } }; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 778b3a0b6adb..2d678fb968c7 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -92,6 +92,11 @@ enum nvme_quirks { * Broken Write Zeroes. */ NVME_QUIRK_DISABLE_WRITE_ZEROES = (1 << 9), + + /* + * Force simple suspend/resume path. + */ + NVME_QUIRK_SIMPLE_SUSPEND = (1 << 10), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6bd9b1033965..732d5b63ec05 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2876,7 +2876,8 @@ static int nvme_suspend(struct device *dev) * state (which may not be possible if the link is up). */ if (pm_suspend_via_firmware() || !ctrl->npss || - !pcie_aspm_enabled(pdev)) { + !pcie_aspm_enabled(pdev) || + (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) { nvme_dev_disable(ndev, true); return 0; } -- cgit v1.2.1 From a3a0e43fd77013819e4b6f55e37e0efe8e35d805 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 20 Aug 2019 11:03:11 -0600 Subject: io_uring: don't enter poll loop if we have CQEs pending We need to check if we have CQEs pending before starting a poll loop, as those could be the events we will be spinning for (and hence we'll find none). This can happen if a CQE triggers an error, or if it is found by eg an IRQ before we get a chance to find it through polling. Signed-off-by: Jens Axboe --- fs/io_uring.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 36f04d0b197b..e7a43a354d91 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -679,6 +679,13 @@ static void io_put_req(struct io_kiocb *req) io_free_req(req); } +static unsigned io_cqring_events(struct io_cq_ring *ring) +{ + /* See comment at the top of this file */ + smp_rmb(); + return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head); +} + /* * Find and free completed poll iocbs */ @@ -818,6 +825,14 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, do { int tmin = 0; + /* + * Don't enter poll loop if we already have events pending. + * If we do, we can potentially be spinning for commands that + * already triggered a CQE (eg in error). + */ + if (io_cqring_events(ctx->cq_ring)) + break; + /* * If a submit got punted to a workqueue, we can have the * application entering polling for a command before it gets @@ -2449,13 +2464,6 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) return submit; } -static unsigned io_cqring_events(struct io_cq_ring *ring) -{ - /* See comment at the top of this file */ - smp_rmb(); - return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head); -} - /* * Wait until events become available, if we don't already have some. The * application must reap them itself, as they reside on the shared cq ring. -- cgit v1.2.1 From 27b7fb1ab7bfad45f5702ff0c78a4822a41b1456 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 15 Aug 2019 11:38:30 +0300 Subject: RDMA/mlx5: Fix MR npages calculation for IB_ACCESS_HUGETLB When ODP is enabled with IB_ACCESS_HUGETLB then the required pages should be calculated based on the extent of the MR, which is rounded to the nearest huge page alignment. Fixes: d2183c6f1958 ("RDMA/umem: Move page_shift from ib_umem to ib_odp_umem") Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190815083834.9245-5-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/core/umem.c | 7 +------ drivers/infiniband/hw/mlx5/mem.c | 5 +++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 08da840ed7ee..56553668256f 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -379,14 +379,9 @@ EXPORT_SYMBOL(ib_umem_release); int ib_umem_page_count(struct ib_umem *umem) { - int i; - int n; + int i, n = 0; struct scatterlist *sg; - if (umem->is_odp) - return ib_umem_num_pages(umem); - - n = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) n += sg_dma_len(sg) >> PAGE_SHIFT; diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index fe1a76d8531c..a40e0abf2338 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -57,9 +57,10 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int entry; if (umem->is_odp) { - unsigned int page_shift = to_ib_umem_odp(umem)->page_shift; + struct ib_umem_odp *odp = to_ib_umem_odp(umem); + unsigned int page_shift = odp->page_shift; - *ncont = ib_umem_page_count(umem); + *ncont = ib_umem_odp_num_pages(odp); *count = *ncont << (page_shift - PAGE_SHIFT); *shift = page_shift; if (order) -- cgit v1.2.1 From 9b440078017f194e56eaae3ac32f333f420c5c4e Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Mon, 19 Aug 2019 16:02:57 +0200 Subject: RDMA/siw: Fix potential NULL de-ref In siw_connect() we have an error flow where there is no valid qp pointer. Make sure we don't try to de-ref in that situation. Fixes: 6c52fdc244b5 ("rdma/siw: connection management") Reported-by: Dan Carpenter Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20190819140257.19319-1-bmt@zurich.ibm.com Signed-off-by: Doug Ledford --- drivers/infiniband/sw/siw/siw_cm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 9ce8a1b925d2..fc97571a640b 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1515,7 +1515,7 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) } } error: - siw_dbg_qp(qp, "failed: %d\n", rv); + siw_dbg(id->device, "failed: %d\n", rv); if (cep) { siw_socket_disassoc(s); @@ -1540,7 +1540,8 @@ error: } else if (s) { sock_release(s); } - siw_qp_put(qp); + if (qp) + siw_qp_put(qp); return rv; } -- cgit v1.2.1 From d58c1834bf0d218a0bc00f8fb44874551b21da84 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Thu, 15 Aug 2019 15:20:33 -0400 Subject: IB/hfi1: Drop stale TID RDMA packets In a congested fabric with adaptive routing enabled, traces show that the sender could receive stale TID RDMA NAK packets that contain newer KDETH PSNs and older Verbs PSNs. If not dropped, these packets could cause the incorrect rewinding of the software flows and the incorrect completion of TID RDMA WRITE requests, and eventually leading to memory corruption and kernel crash. The current code drops stale TID RDMA ACK/NAK packets solely based on KDETH PSNs, which may lead to erroneous processing. This patch fixes the issue by also checking the Verbs PSN. Addition checks are added before rewinding the TID RDMA WRITE DATA packets. Fixes: 9e93e967f7b4 ("IB/hfi1: Add a function to receive TID RDMA ACK packet") Cc: Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/20190815192033.105923.44192.stgit@awfm-01.aw.intel.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/tid_rdma.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 996fc298207e..94070144fef5 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -4509,7 +4509,7 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) struct rvt_swqe *wqe; struct tid_rdma_request *req; struct tid_rdma_flow *flow; - u32 aeth, psn, req_psn, ack_psn, resync_psn, ack_kpsn; + u32 aeth, psn, req_psn, ack_psn, flpsn, resync_psn, ack_kpsn; unsigned long flags; u16 fidx; @@ -4538,6 +4538,9 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) ack_kpsn--; } + if (unlikely(qp->s_acked == qp->s_tail)) + goto ack_op_err; + wqe = rvt_get_swqe_ptr(qp, qp->s_acked); if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE) @@ -4550,7 +4553,8 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow); /* Drop stale ACK/NAK */ - if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0) + if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0 || + cmp_psn(req_psn, flow->flow_state.resp_ib_psn) < 0) goto ack_op_err; while (cmp_psn(ack_kpsn, @@ -4712,7 +4716,12 @@ done: switch ((aeth >> IB_AETH_CREDIT_SHIFT) & IB_AETH_CREDIT_MASK) { case 0: /* PSN sequence error */ + if (!req->flows) + break; flow = &req->flows[req->acked_tail]; + flpsn = full_flow_psn(flow, flow->flow_state.lpsn); + if (cmp_psn(psn, flpsn) > 0) + break; trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow); req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2])); -- cgit v1.2.1 From 35d5c8b82e2c32e8e29ca195bb4dac60ba7d97fc Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Thu, 15 Aug 2019 15:20:39 -0400 Subject: IB/hfi1: Unsafe PSN checking for TID RDMA READ Resp packet When processing a TID RDMA READ RESP packet that causes KDETH EFLAGS errors, the packet's IB PSN is checked against qp->s_last_psn and qp->s_psn without the protection of qp->s_lock, which is not safe. This patch fixes the issue by acquiring qp->s_lock first. Fixes: 9905bf06e890 ("IB/hfi1: Add functions to receive TID RDMA READ response") Cc: Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/20190815192039.105923.7852.stgit@awfm-01.aw.intel.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/tid_rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 94070144fef5..01c8b0280700 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2687,12 +2687,12 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, u32 fpsn; lockdep_assert_held(&qp->r_lock); + spin_lock(&qp->s_lock); /* If the psn is out of valid range, drop the packet */ if (cmp_psn(ibpsn, qp->s_last_psn) < 0 || cmp_psn(ibpsn, qp->s_psn) > 0) - return ret; + goto s_unlock; - spin_lock(&qp->s_lock); /* * Note that NAKs implicitly ACK outstanding SEND and RDMA write * requests and implicitly NAK RDMA read and atomic requests issued -- cgit v1.2.1 From a8adbf7d0d0a6e3bf7f99da461a06039364e028b Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Thu, 15 Aug 2019 15:20:45 -0400 Subject: IB/hfi1: Add additional checks when handling TID RDMA READ RESP packet In a congested fabric with adaptive routing enabled, traces show that packets could be delivered out of order, which could cause incorrect processing of stale packets. For stale TID RDMA READ RESP packets that cause KDETH EFLAGS errors, this patch adds additional checks before processing the packets. Fixes: 9905bf06e890 ("IB/hfi1: Add functions to receive TID RDMA READ response") Cc: Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/20190815192045.105923.59813.stgit@awfm-01.aw.intel.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/tid_rdma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 01c8b0280700..23bb2498e5b4 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2740,9 +2740,12 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, wqe = do_rc_completion(qp, wqe, ibp); if (qp->s_acked == qp->s_tail) - break; + goto s_unlock; } + if (qp->s_acked == qp->s_tail) + goto s_unlock; + /* Handle the eflags for the request */ if (wqe->wr.opcode != IB_WR_TID_RDMA_READ) goto s_unlock; -- cgit v1.2.1 From 90fdae66e72bf0381d168f12dca0259617927895 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Thu, 15 Aug 2019 15:20:51 -0400 Subject: IB/hfi1: Add additional checks when handling TID RDMA WRITE DATA packet In a congested fabric with adaptive routing enabled, traces show that packets could be delivered out of order, which could cause incorrect processing of stale packets. For stale TID RDMA WRITE DATA packets that cause KDETH EFLAGS errors, this patch adds additional checks before processing the packets. Fixes: d72fe7d5008b ("IB/hfi1: Add a function to receive TID RDMA WRITE DATA packet") Cc: Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/20190815192051.105923.69979.stgit@awfm-01.aw.intel.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/tid_rdma.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 23bb2498e5b4..7bccb59d8a30 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2945,8 +2945,15 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, */ spin_lock(&qp->s_lock); qpriv = qp->priv; + if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID || + qpriv->r_tid_tail == qpriv->r_tid_head) + goto unlock; e = &qp->s_ack_queue[qpriv->r_tid_tail]; + if (e->opcode != TID_OP(WRITE_REQ)) + goto unlock; req = ack_to_tid_req(e); + if (req->comp_seg == req->cur_seg) + goto unlock; flow = &req->flows[req->clear_tail]; trace_hfi1_eflags_err_write(qp, rcv_type, rte, psn); trace_hfi1_rsp_handle_kdeth_eflags(qp, psn); -- cgit v1.2.1 From d9d1f5e7bb82415591e8b62b222cbb88c4797ef3 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Thu, 15 Aug 2019 15:20:58 -0400 Subject: IB/hfi1: Drop stale TID RDMA packets that cause TIDErr In a congested fabric with adaptive routing enabled, traces show that packets could be delivered out of order. A stale TID RDMA data packet could lead to TidErr if the TID entries have been released by duplicate data packets generated from retries, and subsequently erroneously force the qp into error state in the current implementation. Since the payload has already been dropped by hardware, the packet can be simply dropped and it is no longer necessary to put the qp into error state. Fixes: 9905bf06e890 ("IB/hfi1: Add functions to receive TID RDMA READ response") Cc: Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/20190815192058.105923.72324.stgit@awfm-01.aw.intel.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/tid_rdma.c | 47 +++-------------------------------- 1 file changed, 3 insertions(+), 44 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 7bccb59d8a30..6141f4edc6bf 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2574,18 +2574,9 @@ void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp) hfi1_kern_clear_hw_flow(priv->rcd, qp); } -static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd, - struct hfi1_packet *packet, u8 rcv_type, - u8 opcode) +static bool tid_rdma_tid_err(struct hfi1_packet *packet, u8 rcv_type) { struct rvt_qp *qp = packet->qp; - struct hfi1_qp_priv *qpriv = qp->priv; - u32 ipsn; - struct ib_other_headers *ohdr = packet->ohdr; - struct rvt_ack_entry *e; - struct tid_rdma_request *req; - struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); - u32 i; if (rcv_type >= RHF_RCV_TYPE_IB) goto done; @@ -2602,41 +2593,9 @@ static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd, if (rcv_type == RHF_RCV_TYPE_EAGER) { hfi1_restart_rc(qp, qp->s_last_psn + 1, 1); hfi1_schedule_send(qp); - goto done_unlock; } - /* - * For TID READ response, error out QP after freeing the tid - * resources. - */ - if (opcode == TID_OP(READ_RESP)) { - ipsn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn)); - if (cmp_psn(ipsn, qp->s_last_psn) > 0 && - cmp_psn(ipsn, qp->s_psn) < 0) { - hfi1_kern_read_tid_flow_free(qp); - spin_unlock(&qp->s_lock); - rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); - goto done; - } - goto done_unlock; - } - - /* - * Error out the qp for TID RDMA WRITE - */ - hfi1_kern_clear_hw_flow(qpriv->rcd, qp); - for (i = 0; i < rvt_max_atomic(rdi); i++) { - e = &qp->s_ack_queue[i]; - if (e->opcode == TID_OP(WRITE_REQ)) { - req = ack_to_tid_req(e); - hfi1_kern_exp_rcv_clear_all(req); - } - } - spin_unlock(&qp->s_lock); - rvt_rc_error(qp, IB_WC_LOC_LEN_ERR); - goto done; - -done_unlock: + /* Since no payload is delivered, just drop the packet */ spin_unlock(&qp->s_lock); done: return true; @@ -2925,7 +2884,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, if (lnh == HFI1_LRH_GRH) goto r_unlock; - if (tid_rdma_tid_err(rcd, packet, rcv_type, opcode)) + if (tid_rdma_tid_err(packet, rcv_type)) goto r_unlock; } -- cgit v1.2.1 From 948a7287b29e06b8c629f5e70235d857a175ceaf Mon Sep 17 00:00:00 2001 From: Ido Kalir Date: Thu, 15 Aug 2019 11:38:27 +0300 Subject: IB/core: Fix NULL pointer dereference when bind QP to counter If QP is not visible to the pid, then we try to decrease its reference count and return from the function before the QP pointer is initialized. This lead to NULL pointer dereference. Fix it by pass directly the res to the rdma_restract_put as arg instead of &qp->res. This fixes below call trace: [ 5845.110329] BUG: kernel NULL pointer dereference, address: 00000000000000dc [ 5845.120482] Oops: 0002 [#1] SMP PTI [ 5845.129119] RIP: 0010:rdma_restrack_put+0x5/0x30 [ib_core] [ 5845.169450] Call Trace: [ 5845.170544] rdma_counter_get_qp+0x5c/0x70 [ib_core] [ 5845.172074] rdma_counter_bind_qpn_alloc+0x6f/0x1a0 [ib_core] [ 5845.173731] nldev_stat_set_doit+0x314/0x330 [ib_core] [ 5845.175279] rdma_nl_rcv_msg+0xeb/0x1d0 [ib_core] [ 5845.176772] ? __kmalloc_node_track_caller+0x20b/0x2b0 [ 5845.178321] rdma_nl_rcv+0xcb/0x120 [ib_core] [ 5845.179753] netlink_unicast+0x179/0x220 [ 5845.181066] netlink_sendmsg+0x2d8/0x3d0 [ 5845.182338] sock_sendmsg+0x30/0x40 [ 5845.183544] __sys_sendto+0xdc/0x160 [ 5845.184832] ? syscall_trace_enter+0x1f8/0x2e0 [ 5845.186209] ? __audit_syscall_exit+0x1d9/0x280 [ 5845.187584] __x64_sys_sendto+0x24/0x30 [ 5845.188867] do_syscall_64+0x48/0x120 [ 5845.190097] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 1bd8e0a9d0fd1 ("RDMA/counter: Allow manual mode configuration support") Signed-off-by: Ido Kalir Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190815083834.9245-2-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/core/counters.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index b79890739a2c..955d061af06a 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -424,7 +424,7 @@ static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num) return qp; err: - rdma_restrack_put(&qp->res); + rdma_restrack_put(res); return NULL; } -- cgit v1.2.1 From c8b32408b4074232d93e64b6c23b2aa96dde448e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 15 Aug 2019 11:38:28 +0300 Subject: RDMA/counters: Properly implement PID checks "Auto" configuration mode is called for visible in that PID namespace and it ensures that all counters and QPs are coexist in the same namespace and belong to same PID. Fixes: 99fa331dc862 ("RDMA/counter: Add "auto" configuration mode support") Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190815083834.9245-3-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/core/counters.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 955d061af06a..af8c85d18e62 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -149,13 +149,11 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, struct auto_mode_param *param = &counter->mode.param; bool match = true; - if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) + if (!rdma_is_visible_in_pid_ns(&qp->res)) return false; - /* Ensure that counter belong to right PID */ - if (!rdma_is_kernel_res(&counter->res) && - !rdma_is_kernel_res(&qp->res) && - (task_pid_vnr(counter->res.task) != current->pid)) + /* Ensure that counter belongs to the right PID */ + if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task)) return false; if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) -- cgit v1.2.1 From 60c78668ae50d6b815ead4a62216822a92097125 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 15 Aug 2019 11:38:29 +0300 Subject: RDMA/restrack: Rewrite PID namespace check to be reliable task_active_pid_ns() is wrong API to check PID namespace because it posses some restrictions and return PID namespace where the process was allocated. It created mismatches with current namespace, which can be different. Rewrite whole rdma_is_visible_in_pid_ns() logic to provide reliable results without any relation to allocated PID namespace. Fixes: 8be565e65fa9 ("RDMA/nldev: Factor out the PID namespace check") Fixes: 6a6c306a09b5 ("RDMA/restrack: Make is_visible_in_pid_ns() as an API") Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190815083834.9245-4-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/core/nldev.c | 3 +-- drivers/infiniband/core/restrack.c | 15 +++++++-------- include/rdma/restrack.h | 3 +-- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 87d40d1ecdde..020c26976558 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -382,8 +382,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) for (i = 0; i < RDMA_RESTRACK_MAX; i++) { if (!names[i]) continue; - curr = rdma_restrack_count(device, i, - task_active_pid_ns(current)); + curr = rdma_restrack_count(device, i); ret = fill_res_info_entry(msg, names[i], curr); if (ret) goto err; diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index bddff426ee0f..a07665f7ef8c 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -107,10 +107,8 @@ void rdma_restrack_clean(struct ib_device *dev) * rdma_restrack_count() - the current usage of specific object * @dev: IB device * @type: actual type of object to operate - * @ns: PID namespace */ -int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, - struct pid_namespace *ns) +int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type) { struct rdma_restrack_root *rt = &dev->res[type]; struct rdma_restrack_entry *e; @@ -119,10 +117,9 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, xa_lock(&rt->xa); xas_for_each(&xas, e, U32_MAX) { - if (ns == &init_pid_ns || - (!rdma_is_kernel_res(e) && - ns == task_active_pid_ns(e->task))) - cnt++; + if (!rdma_is_visible_in_pid_ns(e)) + continue; + cnt++; } xa_unlock(&rt->xa); return cnt; @@ -360,5 +357,7 @@ bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res) */ if (rdma_is_kernel_res(res)) return task_active_pid_ns(current) == &init_pid_ns; - return task_active_pid_ns(current) == task_active_pid_ns(res->task); + + /* PID 0 means that resource is not found in current namespace */ + return task_pid_vnr(res->task); } diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index b0fc6b26bdf5..83df1ec6664e 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -105,8 +105,7 @@ struct rdma_restrack_entry { }; int rdma_restrack_count(struct ib_device *dev, - enum rdma_restrack_type type, - struct pid_namespace *ns); + enum rdma_restrack_type type); void rdma_restrack_kadd(struct rdma_restrack_entry *res); void rdma_restrack_uadd(struct rdma_restrack_entry *res); -- cgit v1.2.1 From 0e6613b41edd2f55a4b33234c5f31410c1ed3783 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 15 Aug 2019 11:38:31 +0300 Subject: IB/mlx5: Consolidate use_umr checks into single function Introduce helper function to unify various use_umr checks. Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190815083834.9245-6-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 14 ++++++++++++++ drivers/infiniband/hw/mlx5/mr.c | 4 +--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f6a53455bf8b..9ae587b74b12 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1475,4 +1475,18 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, bool dyn_bfreg); int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); + +static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, + bool do_modify_atomic) +{ + if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) + return false; + + if (do_modify_atomic && + MLX5_CAP_GEN(dev->mdev, atomic) && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) + return false; + + return true; +} #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b74fad08412f..8bce65c03b84 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1293,9 +1293,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (err < 0) return ERR_PTR(err); - use_umr = !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled) && - (!MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled) || - !MLX5_CAP_GEN(dev->mdev, atomic)); + use_umr = mlx5_ib_can_use_umr(dev, true); if (order <= mr_cache_max_order(dev) && use_umr) { mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, -- cgit v1.2.1 From 008157528ac5658502c0f87e872778c56c41109c Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 15 Aug 2019 11:38:32 +0300 Subject: IB/mlx5: Report and handle ODP support properly ODP depends on the several device capabilities, among them is the ability to send UMR WQEs with that modify atomic and entity size of the MR. Therefore, only if all conditions to send such a UMR WQE are met then driver can report that ODP is supported. Use this check of conditions in all places where driver needs to know about ODP support. Also, implicit ODP support depends on ability of driver to send UMR WQEs for an indirect mkey. Therefore, verify that all conditions to do so are met when reporting support. Fixes: c8d75a980fab ("IB/mlx5: Respect new UMR capabilities") Signed-off-by: Moni Shoua Reviewed-by: Guy Levi Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190815083834.9245-7-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 6 +++--- drivers/infiniband/hw/mlx5/odp.c | 17 +++++++++-------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e12a4404096b..0569bcab02d4 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1023,7 +1023,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - if (MLX5_CAP_GEN(mdev, pg)) + if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; props->odp_caps = dev->odp_caps; } @@ -6139,6 +6139,8 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->port[i].roce.last_port_state = IB_PORT_DOWN; } + mlx5_ib_internal_fill_odp_caps(dev); + err = mlx5_ib_init_multiport_master(dev); if (err) return err; @@ -6563,8 +6565,6 @@ static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) { - mlx5_ib_internal_fill_odp_caps(dev); - return mlx5_ib_odp_init_one(dev); } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 1d257d1b3b0d..0a59912a4cef 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -301,7 +301,8 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) memset(caps, 0, sizeof(*caps)); - if (!MLX5_CAP_GEN(dev->mdev, pg)) + if (!MLX5_CAP_GEN(dev->mdev, pg) || + !mlx5_ib_can_use_umr(dev, true)) return; caps->general_caps = IB_ODP_SUPPORT; @@ -355,7 +356,8 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) && MLX5_CAP_GEN(dev->mdev, null_mkey) && - MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) + MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && + !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled)) caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT; return; @@ -1622,8 +1624,10 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) { int ret = 0; - if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops); + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT)) + return ret; + + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops); if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) { ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey); @@ -1633,9 +1637,6 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) } } - if (!MLX5_CAP_GEN(dev->mdev, pg)) - return ret; - ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq); return ret; @@ -1643,7 +1644,7 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev) { - if (!MLX5_CAP_GEN(dev->mdev, pg)) + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT)) return; mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq); -- cgit v1.2.1 From 25a4517214ffa217a443181f7f885b914e6b328f Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 15 Aug 2019 11:38:33 +0300 Subject: IB/mlx5: Fix MR re-registration flow to use UMR properly The UMR WQE in the MR re-registration flow requires that modify_atomic and modify_entity_size capabilities are enabled. Therefore, check that the these capabilities are present before going to umr flow and go through slow path if not. Fixes: c8d75a980fab ("IB/mlx5: Respect new UMR capabilities") Signed-off-by: Moni Shoua Reviewed-by: Guy Levi Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190815083834.9245-8-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 8bce65c03b84..3401f5f6792e 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1446,7 +1446,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) { + if (!mlx5_ib_can_use_umr(dev, true) || + (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) { /* * UMR can't be used - MKey needs to be replaced. */ -- cgit v1.2.1 From 841b07f99a4766d66f50d8a2ab941bce94cd4e70 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 15 Aug 2019 11:38:34 +0300 Subject: IB/mlx5: Block MR WR if UMR is not possible Check conditions that are mandatory to post_send UMR WQEs. 1. Modifying page size. 2. Modifying remote atomic permissions if atomic access is required. If either condition is not fulfilled then fail to post_send() flow. Fixes: c8d75a980fab ("IB/mlx5: Respect new UMR capabilities") Signed-off-by: Moni Shoua Reviewed-by: Guy Levi Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190815083834.9245-9-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 379328b2598f..72869ff4a334 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4162,7 +4162,7 @@ static u64 get_xlt_octo(u64 bytes) MLX5_IB_UMR_OCTOWORD; } -static __be64 frwr_mkey_mask(void) +static __be64 frwr_mkey_mask(bool atomic) { u64 result; @@ -4175,10 +4175,12 @@ static __be64 frwr_mkey_mask(void) MLX5_MKEY_MASK_LW | MLX5_MKEY_MASK_RR | MLX5_MKEY_MASK_RW | - MLX5_MKEY_MASK_A | MLX5_MKEY_MASK_SMALL_FENCE | MLX5_MKEY_MASK_FREE; + if (atomic) + result |= MLX5_MKEY_MASK_A; + return cpu_to_be64(result); } @@ -4204,7 +4206,7 @@ static __be64 sig_mkey_mask(void) } static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, - struct mlx5_ib_mr *mr, u8 flags) + struct mlx5_ib_mr *mr, u8 flags, bool atomic) { int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; @@ -4212,7 +4214,7 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, umr->flags = flags; umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); - umr->mkey_mask = frwr_mkey_mask(); + umr->mkey_mask = frwr_mkey_mask(atomic); } static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) @@ -4811,10 +4813,22 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); + struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; + bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC; u8 flags = 0; + if (!mlx5_ib_can_use_umr(dev, atomic)) { + mlx5_ib_warn(to_mdev(qp->ibqp.device), + "Fast update of %s for MR is disabled\n", + (MLX5_CAP_GEN(dev->mdev, + umr_modify_entity_size_disabled)) ? + "entity size" : + "atomic access"); + return -EINVAL; + } + if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { mlx5_ib_warn(to_mdev(qp->ibqp.device), "Invalid IB_SEND_INLINE send flag\n"); @@ -4826,7 +4840,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, if (umr_inline) flags |= MLX5_UMR_INLINE; - set_reg_umr_seg(*seg, mr, flags); + set_reg_umr_seg(*seg, mr, flags, atomic); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); -- cgit v1.2.1 From a7bfb93f0211b4a2f1ffeeb259ed6206bac30460 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Mon, 19 Aug 2019 12:27:39 +0800 Subject: RDMA/cma: fix null-ptr-deref Read in cma_cleanup In cma_init, if cma_configfs_init fails, need to free the previously memory and return fail, otherwise will trigger null-ptr-deref Read in cma_cleanup. cma_cleanup cma_configfs_exit configfs_unregister_subsystem Fixes: 045959db65c6 ("IB/cma: Add configfs for rdma_cm") Reported-by: Hulk Robot Signed-off-by: zhengbin Reviewed-by: Parav Pandit Link: https://lore.kernel.org/r/1566188859-103051-1-git-send-email-zhengbin13@huawei.com Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 19f1730a4f24..a68d0ccf67a4 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4724,10 +4724,14 @@ static int __init cma_init(void) if (ret) goto err; - cma_configfs_init(); + ret = cma_configfs_init(); + if (ret) + goto err_ib; return 0; +err_ib: + ib_unregister_client(&cma_client); err: unregister_netdevice_notifier(&cma_nb); ib_sa_unregister_client(&sa_client); -- cgit v1.2.1 From 5c1baaa82cea2c815a5180ded402a7cd455d1810 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sun, 18 Aug 2019 15:23:01 -0500 Subject: IB/mlx4: Fix memory leaks In mlx4_ib_alloc_pv_bufs(), 'tun_qp->tx_ring' is allocated through kcalloc(). However, it is not always deallocated in the following execution if an error occurs, leading to memory leaks. To fix this issue, free 'tun_qp->tx_ring' whenever an error occurs. Signed-off-by: Wenwen Wang Acked-by: Leon Romanovsky Link: https://lore.kernel.org/r/1566159781-4642-1-git-send-email-wenwen@cs.uga.edu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 68c951491a08..57079110af9b 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1677,8 +1677,6 @@ tx_err: tx_buf_size, DMA_TO_DEVICE); kfree(tun_qp->tx_ring[i].buf.addr); } - kfree(tun_qp->tx_ring); - tun_qp->tx_ring = NULL; i = MLX4_NUM_TUNNEL_BUFS; err: while (i > 0) { @@ -1687,6 +1685,8 @@ err: rx_buf_size, DMA_FROM_DEVICE); kfree(tun_qp->ring[i].addr); } + kfree(tun_qp->tx_ring); + tun_qp->tx_ring = NULL; kfree(tun_qp->ring); tun_qp->ring = NULL; return -ENOMEM; -- cgit v1.2.1 From b08afa064c320e5d85cdc27228426b696c4c8dae Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sun, 18 Aug 2019 14:29:31 -0500 Subject: infiniband: hfi1: fix a memory leak bug In fault_opcodes_read(), 'data' is not deallocated if debugfs_file_get() fails, leading to a memory leak. To fix this bug, introduce the 'free_data' label to free 'data' before returning the error. Signed-off-by: Wenwen Wang Reviewed-by: Leon Romanovsky Acked-by: Dennis Dalessandro Link: https://lore.kernel.org/r/1566156571-4335-1-git-send-email-wenwen@cs.uga.edu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/fault.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c index 93613e5def9b..814324d17295 100644 --- a/drivers/infiniband/hw/hfi1/fault.c +++ b/drivers/infiniband/hw/hfi1/fault.c @@ -214,7 +214,7 @@ static ssize_t fault_opcodes_read(struct file *file, char __user *buf, return -ENOMEM; ret = debugfs_file_get(file->f_path.dentry); if (unlikely(ret)) - return ret; + goto free_data; bit = find_first_bit(fault->opcodes, bitsize); while (bit < bitsize) { zero = find_next_zero_bit(fault->opcodes, bitsize, bit); @@ -232,6 +232,7 @@ static ssize_t fault_opcodes_read(struct file *file, char __user *buf, data[size - 1] = '\n'; data[size] = '\0'; ret = simple_read_from_buffer(buf, len, pos, data, size); +free_data: kfree(data); return ret; } -- cgit v1.2.1 From 2323d7baab2b18d87d9bc267452e387aa9f0060a Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sun, 18 Aug 2019 13:54:46 -0500 Subject: infiniband: hfi1: fix memory leaks In fault_opcodes_write(), 'data' is allocated through kcalloc(). However, it is not deallocated in the following execution if an error occurs, leading to memory leaks. To fix this issue, introduce the 'free_data' label to free 'data' before returning the error. Signed-off-by: Wenwen Wang Reviewed-by: Leon Romanovsky Acked-by: Dennis Dalessandro Link: https://lore.kernel.org/r/1566154486-3713-1-git-send-email-wenwen@cs.uga.edu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/fault.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c index 814324d17295..986c12153e62 100644 --- a/drivers/infiniband/hw/hfi1/fault.c +++ b/drivers/infiniband/hw/hfi1/fault.c @@ -141,12 +141,14 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, if (!data) return -ENOMEM; copy = min(len, datalen - 1); - if (copy_from_user(data, buf, copy)) - return -EFAULT; + if (copy_from_user(data, buf, copy)) { + ret = -EFAULT; + goto free_data; + } ret = debugfs_file_get(file->f_path.dentry); if (unlikely(ret)) - return ret; + goto free_data; ptr = data; token = ptr; for (ptr = data; *ptr; ptr = end + 1, token = ptr) { @@ -195,6 +197,7 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, ret = len; debugfs_file_put(file->f_path.dentry); +free_data: kfree(data); return ret; } -- cgit v1.2.1 From 4651d1802f7063e4d8c0bcad957f46ece0c04024 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 19 Aug 2019 14:36:01 -0400 Subject: net/smc: make sure EPOLLOUT is raised Currently, we are only explicitly setting SOCK_NOSPACE on a write timeout for non-blocking sockets. Epoll() edge-trigger mode relies on SOCK_NOSPACE being set when -EAGAIN is returned to ensure that EPOLLOUT is raised. Expand the setting of SOCK_NOSPACE to non-blocking sockets as well that can use SO_SNDTIMEO to adjust their write timeout. This mirrors the behavior that Eric Dumazet introduced for tcp sockets. Signed-off-by: Jason Baron Cc: Eric Dumazet Cc: Ursula Braun Cc: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_tx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index f0de323d15d6..6c8f09c1ce51 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -76,13 +76,11 @@ static int smc_tx_wait(struct smc_sock *smc, int flags) DEFINE_WAIT_FUNC(wait, woken_wake_function); struct smc_connection *conn = &smc->conn; struct sock *sk = &smc->sk; - bool noblock; long timeo; int rc = 0; /* similar to sk_stream_wait_memory */ timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); - noblock = timeo ? false : true; add_wait_queue(sk_sleep(sk), &wait); while (1) { sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -97,8 +95,8 @@ static int smc_tx_wait(struct smc_sock *smc, int flags) break; } if (!timeo) { - if (noblock) - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + /* ensure EPOLLOUT is subsequently generated */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); rc = -EAGAIN; break; } -- cgit v1.2.1 From 96a1b033ac24ccc58156f05c183b2cba0b9412d5 Mon Sep 17 00:00:00 2001 From: "Terry S. Duncan" Date: Mon, 19 Aug 2019 17:24:02 -0700 Subject: net/ncsi: Ensure 32-bit boundary for data cksum The NCSI spec indicates that if the data does not end on a 32 bit boundary, one to three padding bytes equal to 0x00 shall be present to align the checksum field to a 32-bit boundary. Signed-off-by: Terry S. Duncan Signed-off-by: David S. Miller --- net/ncsi/ncsi-cmd.c | 2 +- net/ncsi/ncsi-rsp.c | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index 5c3fad8cba57..eab4346b0a39 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -54,7 +54,7 @@ static void ncsi_cmd_build_header(struct ncsi_pkt_hdr *h, checksum = ncsi_calculate_checksum((unsigned char *)h, sizeof(*h) + nca->payload); pchecksum = (__be32 *)((void *)h + sizeof(struct ncsi_pkt_hdr) + - nca->payload); + ALIGN(nca->payload, 4)); *pchecksum = htonl(checksum); } diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 7581bf919885..d876bd55f356 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -47,7 +47,8 @@ static int ncsi_validate_rsp_pkt(struct ncsi_request *nr, if (ntohs(h->code) != NCSI_PKT_RSP_C_COMPLETED || ntohs(h->reason) != NCSI_PKT_RSP_R_NO_ERROR) { netdev_dbg(nr->ndp->ndev.dev, - "NCSI: non zero response/reason code\n"); + "NCSI: non zero response/reason code %04xh, %04xh\n", + ntohs(h->code), ntohs(h->reason)); return -EPERM; } @@ -55,7 +56,7 @@ static int ncsi_validate_rsp_pkt(struct ncsi_request *nr, * sender doesn't support checksum according to NCSI * specification. */ - pchecksum = (__be32 *)((void *)(h + 1) + payload - 4); + pchecksum = (__be32 *)((void *)(h + 1) + ALIGN(payload, 4) - 4); if (ntohl(*pchecksum) == 0) return 0; @@ -63,7 +64,9 @@ static int ncsi_validate_rsp_pkt(struct ncsi_request *nr, sizeof(*h) + payload - 4); if (*pchecksum != htonl(checksum)) { - netdev_dbg(nr->ndp->ndev.dev, "NCSI: checksum mismatched\n"); + netdev_dbg(nr->ndp->ndev.dev, + "NCSI: checksum mismatched; recd: %08x calc: %08x\n", + *pchecksum, htonl(checksum)); return -EINVAL; } -- cgit v1.2.1 From a1c4cd67840ef80f6ca5f73326fa9a6719303a95 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 20 Aug 2019 13:52:47 +0800 Subject: net: fix __ip_mc_inc_group usage in ip_mc_inc_group, memory allocation flag, not mcast mode, is expected by __ip_mc_inc_group similar issue in __ip_mc_join_group, both mcase mode and gfp_t are needed here, so use ____ip_mc_inc_group(...) Fixes: 9fb20801dab4 ("net: Fix ip_mc_{dec,inc}_group allocation context") Signed-off-by: Li RongQing Signed-off-by: Florian Fainelli Signed-off-by: Zhang Yu Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 180f6896b98b..480d0b22db1a 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1475,7 +1475,7 @@ EXPORT_SYMBOL(__ip_mc_inc_group); void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) { - __ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE); + __ip_mc_inc_group(in_dev, addr, GFP_KERNEL); } EXPORT_SYMBOL(ip_mc_inc_group); @@ -2197,7 +2197,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr, iml->sflist = NULL; iml->sfmode = mode; rcu_assign_pointer(inet->mc_list, iml); - __ip_mc_inc_group(in_dev, addr, mode); + ____ip_mc_inc_group(in_dev, addr, mode, GFP_KERNEL); err = 0; done: return err; -- cgit v1.2.1 From 90ae409f9eb3bcaf38688f9ec22375816053a08e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Aug 2019 11:45:49 +0900 Subject: dma-direct: fix zone selection after an unaddressable CMA allocation The new dma_alloc_contiguous hides if we allocate CMA or regular pages, and thus fails to retry a ZONE_NORMAL allocation if the CMA allocation succeeds but isn't addressable. That means we either fail outright or dip into a small zone that might not succeed either. Thanks to Hillf Danton for debugging this issue. Fixes: b1d2dc009dec ("dma-contiguous: add dma_{alloc,free}_contiguous() helpers") Reported-by: Tobias Klausmann Signed-off-by: Christoph Hellwig Tested-by: Tobias Klausmann --- drivers/iommu/dma-iommu.c | 3 +++ include/linux/dma-contiguous.h | 5 +---- kernel/dma/contiguous.c | 8 ++------ kernel/dma/direct.c | 10 +++++++++- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index d991d40f797f..f68a62c3c32b 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -965,10 +965,13 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size, { bool coherent = dev_is_dma_coherent(dev); size_t alloc_size = PAGE_ALIGN(size); + int node = dev_to_node(dev); struct page *page = NULL; void *cpu_addr; page = dma_alloc_contiguous(dev, alloc_size, gfp); + if (!page) + page = alloc_pages_node(node, gfp, get_order(alloc_size)); if (!page) return NULL; diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index c05d4e661489..03f8e98e3bcc 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -160,10 +160,7 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, static inline struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) { - int node = dev ? dev_to_node(dev) : NUMA_NO_NODE; - size_t align = get_order(PAGE_ALIGN(size)); - - return alloc_pages_node(node, gfp, align); + return NULL; } static inline void dma_free_contiguous(struct device *dev, struct page *page, diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 2bd410f934b3..69cfb4345388 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -230,9 +230,7 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, */ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) { - int node = dev ? dev_to_node(dev) : NUMA_NO_NODE; - size_t count = PAGE_ALIGN(size) >> PAGE_SHIFT; - size_t align = get_order(PAGE_ALIGN(size)); + size_t count = size >> PAGE_SHIFT; struct page *page = NULL; struct cma *cma = NULL; @@ -243,14 +241,12 @@ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) /* CMA can be used only in the context which permits sleeping */ if (cma && gfpflags_allow_blocking(gfp)) { + size_t align = get_order(size); size_t cma_align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT); page = cma_alloc(cma, count, cma_align, gfp & __GFP_NOWARN); } - /* Fallback allocation of normal pages */ - if (!page) - page = alloc_pages_node(node, gfp, align); return page; } diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 795c9b095d75..706113c6bebc 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -85,6 +85,8 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { + size_t alloc_size = PAGE_ALIGN(size); + int node = dev_to_node(dev); struct page *page = NULL; u64 phys_mask; @@ -95,8 +97,14 @@ struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp &= ~__GFP_ZERO; gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, &phys_mask); + page = dma_alloc_contiguous(dev, alloc_size, gfp); + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + dma_free_contiguous(dev, page, alloc_size); + page = NULL; + } again: - page = dma_alloc_contiguous(dev, size, gfp); + if (!page) + page = alloc_pages_node(node, gfp, get_order(alloc_size)); if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { dma_free_contiguous(dev, page, size); page = NULL; -- cgit v1.2.1 From 377ec83643efcae869528b4b26a5070fdeba3abd Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sun, 11 Aug 2019 19:18:03 -0500 Subject: dmaengine: fsldma: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. Fix the following warnings (Building: powerpc-ppa8548_defconfig powerpc): drivers/dma/fsldma.c: In function ‘fsl_dma_chan_probe’: drivers/dma/fsldma.c:1165:26: warning: this statement may fall through [-Wimplicit-fallthrough=] chan->toggle_ext_pause = fsl_chan_toggle_ext_pause; ~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/dma/fsldma.c:1166:2: note: here case FSL_DMA_IP_83XX: ^~~~ Reported-by: kbuild test robot Acked-by: Li Yang Signed-off-by: Gustavo A. R. Silva --- drivers/dma/fsldma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 23e0a356f167..ad72b3f42ffa 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -1163,6 +1163,7 @@ static int fsl_dma_chan_probe(struct fsldma_device *fdev, switch (chan->feature & FSL_DMA_IP_MASK) { case FSL_DMA_IP_85XX: chan->toggle_ext_pause = fsl_chan_toggle_ext_pause; + /* Fall through */ case FSL_DMA_IP_83XX: chan->toggle_ext_start = fsl_chan_toggle_ext_start; chan->set_src_loop_size = fsl_chan_set_src_loop_size; -- cgit v1.2.1 From 06264adfa2bcc8abb556dec9af0e86150a67faf0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 20 Aug 2019 19:29:16 -0500 Subject: ARM: riscpc: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. Fix the following warning (Building: rpc_defconfig arm): arch/arm/mach-rpc/riscpc.c: In function ‘parse_tag_acorn’: arch/arm/mach-rpc/riscpc.c:48:13: warning: this statement may fall through [-Wimplicit-fallthrough=] vram_size += PAGE_SIZE * 256; ~~~~~~~~~~^~~~~~~~~~~~~~~~~~ arch/arm/mach-rpc/riscpc.c:49:2: note: here case 256: ^~~~ Signed-off-by: Gustavo A. R. Silva --- arch/arm/mach-rpc/riscpc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-rpc/riscpc.c b/arch/arm/mach-rpc/riscpc.c index 0ce56ad754ce..ea2c84214bac 100644 --- a/arch/arm/mach-rpc/riscpc.c +++ b/arch/arm/mach-rpc/riscpc.c @@ -46,6 +46,7 @@ static int __init parse_tag_acorn(const struct tag *tag) switch (tag->u.acorn.vram_pages) { case 512: vram_size += PAGE_SIZE * 256; + /* Fall through - ??? */ case 256: vram_size += PAGE_SIZE * 256; default: -- cgit v1.2.1 From edf6a05976980b5c21f19a60fde175f736e4ab61 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 20 Aug 2019 13:01:03 -0500 Subject: drm/sun4i: sun6i_mipi_dsi: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. Fix the following warning (Building: multi_v7_defconfig arm): drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c: In function ‘sun6i_dsi_transfer’: drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c:993:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (msg->rx_len == 1) { ^ drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c:998:2: note: here default: ^~~~~~~ Signed-off-by: Gustavo A. R. Silva --- drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c index a1fc8b520985..b889ad3e86e1 100644 --- a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c +++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c @@ -993,6 +993,7 @@ static ssize_t sun6i_dsi_transfer(struct mipi_dsi_host *host, ret = sun6i_dsi_dcs_read(dsi, msg); break; } + /* Else, fall through */ default: ret = -EINVAL; -- cgit v1.2.1 From 5334653d4ff29f5e1f216a2f5d3a86c19a479b1a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 20 Aug 2019 12:47:06 -0500 Subject: drm/sun4i: tcon: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. Fix the following warning (Building: sunxi_defconfig arm): drivers/gpu/drm/sun4i/sun4i_tcon.c: In function ‘sun4i_tcon0_mode_set_dithering’: drivers/gpu/drm/sun4i/sun4i_tcon.c:318:7: warning: this statement may fall through [-Wimplicit-fallthrough=] val |= SUN4I_TCON0_FRM_CTL_MODE_B; drivers/gpu/drm/sun4i/sun4i_tcon.c:319:2: note: here case MEDIA_BUS_FMT_RGB666_1X18: ^~~~ Signed-off-by: Gustavo A. R. Silva --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 64c43ee6bd92..df0cc8f46d7b 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -314,6 +314,7 @@ static void sun4i_tcon0_mode_set_dithering(struct sun4i_tcon *tcon, /* R and B components are only 5 bits deep */ val |= SUN4I_TCON0_FRM_CTL_MODE_R; val |= SUN4I_TCON0_FRM_CTL_MODE_B; + /* Fall through */ case MEDIA_BUS_FMT_RGB666_1X18: case MEDIA_BUS_FMT_RGB666_1X7X3_SPWG: /* Fall through: enable dithering */ -- cgit v1.2.1 From 3f0289cb9e0ee38e0075328e59b9cd88bf5ea474 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 20 Aug 2019 12:54:32 -0500 Subject: mtd: sa1100: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. Fix the following warning (Building: assabet_defconfig arm): drivers/mtd/maps/sa1100-flash.c: In function ‘sa1100_probe_subdev’: drivers/mtd/maps/sa1100-flash.c:82:3: warning: this statement may fall through [-Wimplicit-fallthrough=] printk(KERN_WARNING "SA1100 flash: unknown base address " ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "0x%08lx, assuming CS0\n", phys); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/mtd/maps/sa1100-flash.c:85:2: note: here case SA1100_CS0_PHYS: ^~~~ Signed-off-by: Gustavo A. R. Silva --- drivers/mtd/maps/sa1100-flash.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c index 895510d40ce4..47602af4ee34 100644 --- a/drivers/mtd/maps/sa1100-flash.c +++ b/drivers/mtd/maps/sa1100-flash.c @@ -81,6 +81,7 @@ static int sa1100_probe_subdev(struct sa_subdev_info *subdev, struct resource *r default: printk(KERN_WARNING "SA1100 flash: unknown base address " "0x%08lx, assuming CS0\n", phys); + /* Fall through */ case SA1100_CS0_PHYS: subdev->map.bankwidth = (MSC0 & MSC_RBW) ? 2 : 4; -- cgit v1.2.1 From c9cbbb9f04f3ee27970f08d3aa6e6742a43d4ca5 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 20 Aug 2019 13:07:46 -0500 Subject: watchdog: wdt285: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. Fix the following warning (Building: footbridge_defconfig arm): drivers/watchdog/wdt285.c: In function ‘watchdog_ioctl’: drivers/watchdog/wdt285.c:170:3: warning: this statement may fall through [-Wimplicit-fallthrough=] watchdog_ping(); ^~~~~~~~~~~~~~~ drivers/watchdog/wdt285.c:172:2: note: here case WDIOC_GETTIMEOUT: ^~~~ Signed-off-by: Gustavo A. R. Silva --- drivers/watchdog/wdt285.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/wdt285.c b/drivers/watchdog/wdt285.c index 4eacfb1ce1ac..eb729d704836 100644 --- a/drivers/watchdog/wdt285.c +++ b/drivers/watchdog/wdt285.c @@ -168,7 +168,7 @@ static long watchdog_ioctl(struct file *file, unsigned int cmd, soft_margin = new_margin; reload = soft_margin * (mem_fclk_21285 / 256); watchdog_ping(); - /* Fall */ + /* Fall through */ case WDIOC_GETTIMEOUT: ret = put_user(soft_margin, int_arg); break; -- cgit v1.2.1 From 5274fdba8e3c04e9ac1ba457379afc8835f9aa0f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 20 Aug 2019 15:55:26 -0500 Subject: power: supply: ab8500_charger: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. Fix the following warning (Building: allmodconfig arm): drivers/power/supply/ab8500_charger.c: In function ‘ab8500_charger_max_usb_curr’: drivers/power/supply/ab8500_charger.c:738:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (di->vbus_detected) { ^ drivers/power/supply/ab8500_charger.c:745:2: note: here case USB_STAT_HM_IDGND: ^~~~ Signed-off-by: Gustavo A. R. Silva --- drivers/power/supply/ab8500_charger.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/supply/ab8500_charger.c b/drivers/power/supply/ab8500_charger.c index 30de448de802..86d88aec94a1 100644 --- a/drivers/power/supply/ab8500_charger.c +++ b/drivers/power/supply/ab8500_charger.c @@ -742,6 +742,7 @@ static int ab8500_charger_max_usb_curr(struct ab8500_charger *di, USB_CH_IP_CUR_LVL_1P5; break; } + /* Else, fall through */ case USB_STAT_HM_IDGND: dev_err(di->dev, "USB Type - Charging not allowed\n"); di->max_usb_in_curr.usb_type_max = USB_CH_IP_CUR_LVL_0P05; -- cgit v1.2.1 From 93cbcf5d22bbe6f2ecf64765d5f6085beceb3ee8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 20 Aug 2019 16:03:09 -0500 Subject: MIPS: Octeon: Mark expected switch fall-through Mark switch cases where we are expecting to fall through. Fix the following warning (Building: cavium_octeon_defconfig mips): arch/mips/include/asm/octeon/cvmx-sli-defs.h:47:6: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Gustavo A. R. Silva --- arch/mips/include/asm/octeon/cvmx-sli-defs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/include/asm/octeon/cvmx-sli-defs.h b/arch/mips/include/asm/octeon/cvmx-sli-defs.h index 52cf96ea43e5..cbc7cdae1c6a 100644 --- a/arch/mips/include/asm/octeon/cvmx-sli-defs.h +++ b/arch/mips/include/asm/octeon/cvmx-sli-defs.h @@ -46,6 +46,7 @@ static inline uint64_t CVMX_SLI_PCIE_MSI_RCV_FUNC(void) case OCTEON_CN78XX & OCTEON_FAMILY_MASK: if (OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X)) return 0x0000000000003CB0ull; + /* Else, fall through */ default: return 0x0000000000023CB0ull; } -- cgit v1.2.1 From da1fb2909e701ffbae8c5d6111f475603355e6e2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 20 Aug 2019 16:20:05 -0500 Subject: scsi: libsas: sas_discover: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. Fix the following warning (Building: mtx1_defconfig mips): drivers/scsi/libsas/sas_discover.c: In function ‘sas_discover_domain’: ./include/linux/printk.h:309:2: warning: this statement may fall through [-Wimplicit-fallthrough=] printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/scsi/libsas/sas_discover.c:459:3: note: in expansion of macro ‘pr_notice’ pr_notice("ATA device seen but CONFIG_SCSI_SAS_ATA=N so cannot attach\n"); ^~~~~~~~~ drivers/scsi/libsas/sas_discover.c:462:2: note: here default: ^~~~~~~ Signed-off-by: Gustavo A. R. Silva --- drivers/scsi/libsas/sas_discover.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index abcad097ff2f..f47b4b281b14 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -459,6 +459,7 @@ static void sas_discover_domain(struct work_struct *work) pr_notice("ATA device seen but CONFIG_SCSI_SAS_ATA=N so cannot attach\n"); /* Fall through */ #endif + /* Fall through - only for the #else condition above. */ default: error = -ENXIO; pr_err("unhandled device %d\n", dev->dev_type); -- cgit v1.2.1 From c3cb6674df4c4a70f949e412dfe2230483092523 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 20 Aug 2019 19:07:46 -0500 Subject: video: fbdev: acornfb: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. Fix the following warning (Building: rpc_defconfig arm): drivers/video/fbdev/acornfb.c: In function ‘acornfb_parse_dram’: drivers/video/fbdev/acornfb.c:860:9: warning: this statement may fall through [-Wimplicit-fallthrough=] size *= 1024; ~~~~~^~~~~~~ drivers/video/fbdev/acornfb.c:861:3: note: here case 'K': ^~~~ Signed-off-by: Gustavo A. R. Silva --- drivers/video/fbdev/acornfb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/fbdev/acornfb.c b/drivers/video/fbdev/acornfb.c index 92f23e3bc27a..7cacae5a8797 100644 --- a/drivers/video/fbdev/acornfb.c +++ b/drivers/video/fbdev/acornfb.c @@ -858,6 +858,7 @@ static void acornfb_parse_dram(char *opt) case 'M': case 'm': size *= 1024; + /* Fall through */ case 'K': case 'k': size *= 1024; -- cgit v1.2.1 From 6de3c9e3f6b3eaf66859e1379b3f35dda781416b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 15 Aug 2019 11:41:06 +0200 Subject: ALSA: usb-audio: Fix invalid NULL check in snd_emuusb_set_samplerate() The quirk function snd_emuusb_set_samplerate() has a NULL check for the mixer element, but this is useless in the current code. It used to be a check against mixer->id_elems[unitid] but it was changed later to the value after mixer_eleme_list_to_info() which is always non-NULL due to the container_of() usage. This patch fixes the check before the conversion. While we're at it, correct a typo in the comment in the function, too. Fixes: 8c558076c740 ("ALSA: usb-audio: Clean up mixer element list traverse") Cc: Signed-off-by: Takashi Iwai --- sound/usb/mixer_quirks.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 199fa157a411..27dcb3743690 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -1155,17 +1155,17 @@ void snd_emuusb_set_samplerate(struct snd_usb_audio *chip, { struct usb_mixer_interface *mixer; struct usb_mixer_elem_info *cval; - int unitid = 12; /* SamleRate ExtensionUnit ID */ + int unitid = 12; /* SampleRate ExtensionUnit ID */ list_for_each_entry(mixer, &chip->mixer_list, list) { - cval = mixer_elem_list_to_info(mixer->id_elems[unitid]); - if (cval) { + if (mixer->id_elems[unitid]) { + cval = mixer_elem_list_to_info(mixer->id_elems[unitid]); snd_usb_mixer_set_ctl_value(cval, UAC_SET_CUR, cval->control << 8, samplerate_id); snd_usb_mixer_notify_id(mixer, unitid); + break; } - break; } } -- cgit v1.2.1 From 5fd2f91ad483baffdbe798f8a08f1b41442d1e24 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 1 Aug 2019 09:30:33 +0200 Subject: mac80211: fix possible sta leak If TDLS station addition is rejected, the sta memory is leaked. Avoid this by moving the check before the allocation. Cc: stable@vger.kernel.org Fixes: 7ed5285396c2 ("mac80211: don't initiate TDLS connection if station is not associated to AP") Link: https://lore.kernel.org/r/20190801073033.7892-1-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4d458067d80d..111c400199ec 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1546,6 +1546,11 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (is_multicast_ether_addr(mac)) return -EINVAL; + if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER) && + sdata->vif.type == NL80211_IFTYPE_STATION && + !sdata->u.mgd.associated) + return -EINVAL; + sta = sta_info_alloc(sdata, mac, GFP_KERNEL); if (!sta) return -ENOMEM; @@ -1553,10 +1558,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) sta->sta.tdls = true; - if (sta->sta.tdls && sdata->vif.type == NL80211_IFTYPE_STATION && - !sdata->u.mgd.associated) - return -EINVAL; - err = sta_apply_parameters(local, sta, params); if (err) { sta_info_free(local, sta); -- cgit v1.2.1 From b67fd72e84a88cae64cea8ab47ccdaab3bb3094d Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Mon, 5 Aug 2019 14:34:00 +0200 Subject: cfg80211: Fix Extended Key ID key install checks Fix two shortcomings in the Extended Key ID API: 1) Allow the userspace to install pairwise keys using keyid 1 without NL80211_KEY_NO_TX set. This allows the userspace to install and activate pairwise keys with keyid 1 in the same way as for keyid 0, simplifying the API usage for e.g. FILS and FT key installs. 2) IEEE 802.11 - 2016 restricts Extended Key ID usage to CCMP/GCMP ciphers in IEEE 802.11 - 2016 "9.4.2.25.4 RSN capabilities". Enforce that when installing a key. Cc: stable@vger.kernel.org # 5.2 Fixes: 6cdd3979a2bd ("nl80211/cfg80211: Extended Key ID support") Signed-off-by: Alexander Wetzel Link: https://lore.kernel.org/r/20190805123400.51567-1-alexander@wetzel-home.de Signed-off-by: Johannes Berg --- net/wireless/util.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index d0e35b7b9e35..e74837824cea 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -233,25 +233,30 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, switch (params->cipher) { case WLAN_CIPHER_SUITE_TKIP: + /* Extended Key ID can only be used with CCMP/GCMP ciphers */ + if ((pairwise && key_idx) || + params->mode != NL80211_KEY_RX_TX) + return -EINVAL; + break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: - /* IEEE802.11-2016 allows only 0 and - when using Extended Key - * ID - 1 as index for pairwise keys. + /* IEEE802.11-2016 allows only 0 and - when supporting + * Extended Key ID - 1 as index for pairwise keys. * @NL80211_KEY_NO_TX is only allowed for pairwise keys when * the driver supports Extended Key ID. * @NL80211_KEY_SET_TX can't be set when installing and * validating a key. */ - if (params->mode == NL80211_KEY_NO_TX) { - if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_EXT_KEY_ID)) - return -EINVAL; - else if (!pairwise || key_idx < 0 || key_idx > 1) + if ((params->mode == NL80211_KEY_NO_TX && !pairwise) || + params->mode == NL80211_KEY_SET_TX) + return -EINVAL; + if (wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_EXT_KEY_ID)) { + if (pairwise && (key_idx < 0 || key_idx > 1)) return -EINVAL; - } else if ((pairwise && key_idx) || - params->mode == NL80211_KEY_SET_TX) { + } else if (pairwise && key_idx) { return -EINVAL; } break; -- cgit v1.2.1 From 54577e5018a8c0cb79c9a0fa118a55c68715d398 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 20 Aug 2019 17:35:52 +0200 Subject: selftests: kvm: fix state save/load on processors without XSAVE state_test and smm_test are failing on older processors that do not have xcr0. This is because on those processor KVM does provide support for KVM_GET/SET_XSAVE (to avoid having to rely on the older KVM_GET/SET_FPU) but not for KVM_GET/SET_XCRS. Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/x86_64/processor.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 6cb34a0fa200..0a5e487dbc50 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1060,9 +1060,11 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", r); - r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i", - r); + if (kvm_check_cap(KVM_CAP_XCRS)) { + r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i", + r); + } r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs); TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", @@ -1103,9 +1105,11 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", r); - r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i", - r); + if (kvm_check_cap(KVM_CAP_XCRS)) { + r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i", + r); + } r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", -- cgit v1.2.1 From d012a06ab1d23178fc6856d8d2161fbcc4dd8ebd Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 15 Aug 2019 09:43:32 +0200 Subject: Revert "KVM: x86/mmu: Zap only the relevant pages when removing a memslot" This reverts commit 4e103134b862314dc2f2f18f2fb0ab972adc3f5f. Alex Williamson reported regressions with device assignment with this patch. Even though the bug is probably elsewhere and still latent, this is needed to fix the regression. Fixes: 4e103134b862 ("KVM: x86/mmu: Zap only the relevant pages when removing a memslot", 2019-02-05) Reported-by: Alex Willamson Cc: stable@vger.kernel.org Cc: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 33 +-------------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 24843cf49579..218b277bfda3 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5653,38 +5653,7 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, struct kvm_page_track_notifier_node *node) { - struct kvm_mmu_page *sp; - LIST_HEAD(invalid_list); - unsigned long i; - bool flush; - gfn_t gfn; - - spin_lock(&kvm->mmu_lock); - - if (list_empty(&kvm->arch.active_mmu_pages)) - goto out_unlock; - - flush = slot_handle_all_level(kvm, slot, kvm_zap_rmapp, false); - - for (i = 0; i < slot->npages; i++) { - gfn = slot->base_gfn + i; - - for_each_valid_sp(kvm, sp, gfn) { - if (sp->gfn != gfn) - continue; - - kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); - } - if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { - kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); - flush = false; - cond_resched_lock(&kvm->mmu_lock); - } - } - kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); - -out_unlock: - spin_unlock(&kvm->mmu_lock); + kvm_mmu_zap_all(kvm); } void kvm_mmu_init_vm(struct kvm *kvm) -- cgit v1.2.1 From 0d31d4dbf38412f5b8b11b4511d07b840eebe8cb Mon Sep 17 00:00:00 2001 From: "Hodaszi, Robert" Date: Fri, 14 Jun 2019 13:16:01 +0000 Subject: Revert "cfg80211: fix processing world regdomain when non modular" This reverts commit 96cce12ff6e0 ("cfg80211: fix processing world regdomain when non modular"). Re-triggering a reg_process_hint with the last request on all events, can make the regulatory domain fail in case of multiple WiFi modules. On slower boards (espacially with mdev), enumeration of the WiFi modules can end up in an intersected regulatory domain, and user cannot set it with 'iw reg set' anymore. This is happening, because: - 1st module enumerates, queues up a regulatory request - request gets processed by __reg_process_hint_driver(): - checks if previous was set by CORE -> yes - checks if regulator domain changed -> yes, from '00' to e.g. 'US' -> sends request to the 'crda' - 2nd module enumerates, queues up a regulator request (which triggers the reg_todo() work) - reg_todo() -> reg_process_pending_hints() sees, that the last request is not processed yet, so it tries to process it again. __reg_process_hint driver() will run again, and: - checks if the last request's initiator was the core -> no, it was the driver (1st WiFi module) - checks, if the previous initiator was the driver -> yes - checks if the regulator domain changed -> yes, it was '00' (set by core, and crda call did not return yet), and should be changed to 'US' ------> __reg_process_hint_driver calls an intersect Besides, the reg_process_hint call with the last request is meaningless since the crda call has a timeout work. If that timeout expires, the first module's request will lost. Cc: stable@vger.kernel.org Fixes: 96cce12ff6e0 ("cfg80211: fix processing world regdomain when non modular") Signed-off-by: Robert Hodaszi Link: https://lore.kernel.org/r/20190614131600.GA13897@a1-hr Signed-off-by: Johannes Berg --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 4831ad745f91..327479ce69f5 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2788,7 +2788,7 @@ static void reg_process_pending_hints(void) /* When last_request->processed becomes true this will be rescheduled */ if (lr && !lr->processed) { - reg_process_hint(lr); + pr_debug("Pending regulatory request, waiting for it to be processed...\n"); return; } -- cgit v1.2.1 From 3b5be16c7e90a69c93349d210766250fffcb54bd Mon Sep 17 00:00:00 2001 From: He Zhe Date: Tue, 20 Aug 2019 22:53:10 +0800 Subject: modules: page-align module section allocations only for arches supporting strict module rwx We should keep the case of "#define debug_align(X) (X)" for all arches without CONFIG_HAS_STRICT_MODULE_RWX ability, which would save people, who are sensitive to system size, a lot of memory when using modules, especially for embedded systems. This is also the intention of the original #ifdef... statement and still valid for now. Note that this still keeps the effect of the fix of the following commit, 38f054d549a8 ("modules: always page-align module section allocations"), since when CONFIG_ARCH_HAS_STRICT_MODULE_RWX is enabled, module pages are aligned. Signed-off-by: He Zhe Signed-off-by: Jessica Yu --- kernel/module.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/module.c b/kernel/module.c index cd8df516666d..9ee93421269c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -64,9 +64,14 @@ /* * Modules' sections will be aligned on page boundaries - * to ensure complete separation of code and data + * to ensure complete separation of code and data, but + * only when CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y */ +#ifdef CONFIG_ARCH_HAS_STRICT_MODULE_RWX # define debug_align(X) ALIGN(X, PAGE_SIZE) +#else +# define debug_align(X) (X) +#endif /* If this is set, the section belongs in the init part of the module */ #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) -- cgit v1.2.1 From ed19e3035c5a16034e896eed28c5e72e02e2ff58 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 8 Aug 2019 19:25:47 +0300 Subject: drm/i915: Fix HW readout for crtc_clock in HDMI mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The conversion during HDMI HW readout from port_clock to crtc_clock was missed when HDMI 10bpc support was added, so fix that. v2: - Unscrew the non-HDMI case. Fixes: cd9e11a8bf25 ("drm/i915/icl: Add 10-bit support for hdmi") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109593 Cc: Radhakrishna Sripada Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190808162547.7009-1-imre.deak@intel.com (cherry picked from commit 2969a78aead38b49e80c821a5c683544ab16160d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_ddi.c | 4 ++-- drivers/gpu/drm/i915/intel_drv.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 7925a176f900..1cb1fa74cfbc 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1465,8 +1465,8 @@ static void ddi_dotclock_get(struct intel_crtc_state *pipe_config) else if (intel_crtc_has_dp_encoder(pipe_config)) dotclock = intel_dotclock_calculate(pipe_config->port_clock, &pipe_config->dp_m_n); - else if (pipe_config->has_hdmi_sink && pipe_config->pipe_bpp == 36) - dotclock = pipe_config->port_clock * 2 / 3; + else if (pipe_config->has_hdmi_sink && pipe_config->pipe_bpp > 24) + dotclock = pipe_config->port_clock * 24 / pipe_config->pipe_bpp; else dotclock = pipe_config->port_clock; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 1d58f7ec5d84..f11979879e7b 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -829,7 +829,7 @@ struct intel_crtc_state { /* * Frequence the dpll for the port should run at. Differs from the - * adjusted dotclock e.g. for DP or 12bpc hdmi mode. This is also + * adjusted dotclock e.g. for DP or 10/12bpc hdmi mode. This is also * already multiplied by pixel_multiplier. */ int port_clock; -- cgit v1.2.1 From 0f686ca933597cfcc0636253fc1740423c062ec7 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 9 Aug 2019 01:24:30 +0300 Subject: Revert "mmc: sdhci-tegra: drop ->get_ro() implementation" The WRITE_PROTECT bit is always in a "protected mode" on Tegra and WP-GPIO state need to be used instead. In a case of the GPIO absence, write-enable should be assumed. External SD is writable once again as a result of this patch because the offending commit changed behaviour for the case of a missing WP-GPIO to fall back to WRITE_PROTECT bit-checking, which is incorrect for Tegra. Cc: stable@vger.kernel.org # v5.1+ Fixes: e8391453e27f ("mmc: sdhci-tegra: drop ->get_ro() implementation") Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-tegra.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index f4d4761cf20a..02d8f524bb9e 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -258,6 +258,16 @@ static void tegra210_sdhci_writew(struct sdhci_host *host, u16 val, int reg) } } +static unsigned int tegra_sdhci_get_ro(struct sdhci_host *host) +{ + /* + * Write-enable shall be assumed if GPIO is missing in a board's + * device-tree because SDHCI's WRITE_PROTECT bit doesn't work on + * Tegra. + */ + return mmc_gpio_get_ro(host->mmc); +} + static bool tegra_sdhci_is_pad_and_regulator_valid(struct sdhci_host *host) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); @@ -1224,6 +1234,7 @@ static const struct cqhci_host_ops sdhci_tegra_cqhci_ops = { }; static const struct sdhci_ops tegra_sdhci_ops = { + .get_ro = tegra_sdhci_get_ro, .read_w = tegra_sdhci_readw, .write_l = tegra_sdhci_writel, .set_clock = tegra_sdhci_set_clock, @@ -1279,6 +1290,7 @@ static const struct sdhci_tegra_soc_data soc_data_tegra30 = { }; static const struct sdhci_ops tegra114_sdhci_ops = { + .get_ro = tegra_sdhci_get_ro, .read_w = tegra_sdhci_readw, .write_w = tegra_sdhci_writew, .write_l = tegra_sdhci_writel, @@ -1332,6 +1344,7 @@ static const struct sdhci_tegra_soc_data soc_data_tegra124 = { }; static const struct sdhci_ops tegra210_sdhci_ops = { + .get_ro = tegra_sdhci_get_ro, .read_w = tegra_sdhci_readw, .write_w = tegra210_sdhci_writew, .write_l = tegra_sdhci_writel, @@ -1366,6 +1379,7 @@ static const struct sdhci_tegra_soc_data soc_data_tegra210 = { }; static const struct sdhci_ops tegra186_sdhci_ops = { + .get_ro = tegra_sdhci_get_ro, .read_w = tegra_sdhci_readw, .write_l = tegra_sdhci_writel, .set_clock = tegra_sdhci_set_clock, -- cgit v1.2.1 From 806ce6e2117a42528e7bb979e04e28229b34a612 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 20 Aug 2019 16:18:04 +0200 Subject: selftests/bpf: fix test_cgroup_storage on s390 test_cgroup_storage fails on s390 with an assertion failure: packets are dropped when they shouldn't. The problem is that BPF_DW packet count is accessed as BPF_W with an offset of 0, which is not correct on big-endian machines. Since the point of this test is not to verify narrow loads/stores, simply use BPF_DW when working with packet counts. Fixes: 68cfa3ac6b8d ("selftests/bpf: add a cgroup storage test") Fixes: 919646d2a3a9 ("selftests/bpf: extend the storage test to test per-cpu cgroup storage") Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_cgroup_storage.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c index 2fc4625c1a15..655729004391 100644 --- a/tools/testing/selftests/bpf/test_cgroup_storage.c +++ b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -20,9 +20,9 @@ int main(int argc, char **argv) BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1), - BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */ BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */ @@ -30,7 +30,7 @@ int main(int argc, char **argv) BPF_FUNC_get_local_storage), BPF_MOV64_IMM(BPF_REG_1, 1), BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1), BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), -- cgit v1.2.1 From e91dcb536ae263ecff07118e36bf820c229a6ecd Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 19 Aug 2019 14:38:47 +0200 Subject: selftests/bpf: fix test_btf_dump with O= test_btf_dump fails when run with O=, because it needs to access source files and assumes they live in ./progs/, which is not the case in this scenario. Fix by instructing kselftest to copy btf_dump_test_case_*.c files to the test directory. Since kselftest does not preserve directory structure, adjust the test to look in ./progs/ and then in ./. Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 3 +++ tools/testing/selftests/bpf/test_btf_dump.c | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index c085964e1d05..69b98d8d3b5b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -34,6 +34,9 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) TEST_GEN_FILES = $(BPF_OBJ_FILES) +BTF_C_FILES = $(wildcard progs/btf_dump_test_case_*.c) +TEST_FILES = $(BTF_C_FILES) + # Also test sub-register code-gen if LLVM has eBPF v3 processor support which # contains both ALU32 and JMP32 instructions. SUBREG_CODEGEN := $(shell echo "int cal(int a) { return a > 0; }" | \ diff --git a/tools/testing/selftests/bpf/test_btf_dump.c b/tools/testing/selftests/bpf/test_btf_dump.c index 8f850823d35f..6e75dd3cb14f 100644 --- a/tools/testing/selftests/bpf/test_btf_dump.c +++ b/tools/testing/selftests/bpf/test_btf_dump.c @@ -97,6 +97,13 @@ int test_btf_dump_case(int n, struct btf_dump_test_case *test_case) } snprintf(test_file, sizeof(test_file), "progs/%s.c", test_case->name); + if (access(test_file, R_OK) == -1) + /* + * When the test is run with O=, kselftest copies TEST_FILES + * without preserving the directory structure. + */ + snprintf(test_file, sizeof(test_file), "%s.c", + test_case->name); /* * Diff test output and expected test output, contained between * START-EXPECTED-OUTPUT and END-EXPECTED-OUTPUT lines in test case. -- cgit v1.2.1 From 0604409df9e04cdec7b08d471c8c1c0c10b5554d Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 20 Aug 2019 15:41:34 +0200 Subject: selftests/bpf: add config fragment BPF_JIT When running test_kmod.sh the following shows up # sysctl cannot stat /proc/sys/net/core/bpf_jit_enable No such file or directory cannot: stat_/proc/sys/net/core/bpf_jit_enable # # sysctl cannot stat /proc/sys/net/core/bpf_jit_harden No such file or directory cannot: stat_/proc/sys/net/core/bpf_jit_harden # Rework to enable CONFIG_BPF_JIT to solve "No such file or directory" Signed-off-by: Anders Roxell Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index f7a0744db31e..5dc109f4c097 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -34,3 +34,4 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_IPV6_SIT=m +CONFIG_BPF_JIT=y -- cgit v1.2.1 From 3035bb72ee47d494c041465b4add9c6407c832ed Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 20 Aug 2019 15:41:21 +0200 Subject: selftests/bpf: install files test_xdp_vlan.sh When ./test_xdp_vlan_mode_generic.sh runs it complains that it can't find file test_xdp_vlan.sh. # selftests: bpf: test_xdp_vlan_mode_generic.sh # ./test_xdp_vlan_mode_generic.sh: line 9: ./test_xdp_vlan.sh: No such file or directory Rework so that test_xdp_vlan.sh gets installed, added to the variable TEST_PROGS_EXTENDED. Fixes: d35661fcf95d ("selftests/bpf: add wrapper scripts for test_xdp_vlan.sh") Signed-off-by: Anders Roxell Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 69b98d8d3b5b..96752ebd938f 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -71,7 +71,8 @@ TEST_PROGS := test_kmod.sh \ TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh \ tcp_client.py \ - tcp_server.py + tcp_server.py \ + test_xdp_vlan.sh # Compile but not part of 'make run_tests' TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \ -- cgit v1.2.1 From 08c04c84a5cde3af9baac0645a7496d6dcd76822 Mon Sep 17 00:00:00 2001 From: Bryan Gurney Date: Fri, 16 Aug 2019 10:09:53 -0400 Subject: dm dust: use dust block size for badblocklist index Change the "frontend" dust_remove_block, dust_add_block, and dust_query_block functions to store the "dust block number", instead of the sector number corresponding to the "dust block number". For the "backend" functions dust_map_read and dust_map_write, right-shift by sect_per_block_shift. This fixes the inability to emulate failure beyond the first sector of each "dust block" (for devices with a "dust block size" larger than 512 bytes). Fixes: e4f3fabd67480bf ("dm: add dust target") Cc: stable@vger.kernel.org Signed-off-by: Bryan Gurney Signed-off-by: Mike Snitzer --- drivers/md/dm-dust.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-dust.c b/drivers/md/dm-dust.c index 845f376a72d9..8288887b7f94 100644 --- a/drivers/md/dm-dust.c +++ b/drivers/md/dm-dust.c @@ -25,6 +25,7 @@ struct dust_device { unsigned long long badblock_count; spinlock_t dust_lock; unsigned int blksz; + int sect_per_block_shift; unsigned int sect_per_block; sector_t start; bool fail_read_on_bb:1; @@ -79,7 +80,7 @@ static int dust_remove_block(struct dust_device *dd, unsigned long long block) unsigned long flags; spin_lock_irqsave(&dd->dust_lock, flags); - bblock = dust_rb_search(&dd->badblocklist, block * dd->sect_per_block); + bblock = dust_rb_search(&dd->badblocklist, block); if (bblock == NULL) { if (!dd->quiet_mode) { @@ -113,7 +114,7 @@ static int dust_add_block(struct dust_device *dd, unsigned long long block) } spin_lock_irqsave(&dd->dust_lock, flags); - bblock->bb = block * dd->sect_per_block; + bblock->bb = block; if (!dust_rb_insert(&dd->badblocklist, bblock)) { if (!dd->quiet_mode) { DMERR("%s: block %llu already in badblocklist", @@ -138,7 +139,7 @@ static int dust_query_block(struct dust_device *dd, unsigned long long block) unsigned long flags; spin_lock_irqsave(&dd->dust_lock, flags); - bblock = dust_rb_search(&dd->badblocklist, block * dd->sect_per_block); + bblock = dust_rb_search(&dd->badblocklist, block); if (bblock != NULL) DMINFO("%s: block %llu found in badblocklist", __func__, block); else @@ -165,6 +166,7 @@ static int dust_map_read(struct dust_device *dd, sector_t thisblock, int ret = DM_MAPIO_REMAPPED; if (fail_read_on_bb) { + thisblock >>= dd->sect_per_block_shift; spin_lock_irqsave(&dd->dust_lock, flags); ret = __dust_map_read(dd, thisblock); spin_unlock_irqrestore(&dd->dust_lock, flags); @@ -195,6 +197,7 @@ static int dust_map_write(struct dust_device *dd, sector_t thisblock, unsigned long flags; if (fail_read_on_bb) { + thisblock >>= dd->sect_per_block_shift; spin_lock_irqsave(&dd->dust_lock, flags); __dust_map_write(dd, thisblock); spin_unlock_irqrestore(&dd->dust_lock, flags); @@ -331,6 +334,8 @@ static int dust_ctr(struct dm_target *ti, unsigned int argc, char **argv) dd->blksz = blksz; dd->start = tmp; + dd->sect_per_block_shift = __ffs(sect_per_block); + /* * Whether to fail a read on a "bad" block. * Defaults to false; enabled later by message. -- cgit v1.2.1 From e0702d90b79d430b0ccc276ead4f88440bb51352 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 19 Aug 2019 12:58:14 +0300 Subject: dm zoned: fix potential NULL dereference in dmz_do_reclaim() This function is supposed to return error pointers so it matches the dmz_get_rnd_zone_for_reclaim() function. The current code could lead to a NULL dereference in dmz_do_reclaim() Fixes: b234c6d7a703 ("dm zoned: improve error handling in reclaim") Signed-off-by: Dan Carpenter Reviewed-by: Dmitry Fomichev Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 2a5bc51fd6d5..595a73110e17 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1588,7 +1588,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd) struct dm_zone *zone; if (list_empty(&zmd->map_seq_list)) - return NULL; + return ERR_PTR(-EBUSY); list_for_each_entry(zone, &zmd->map_seq_list, link) { if (!zone->bzone) @@ -1597,7 +1597,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd) return zone; } - return NULL; + return ERR_PTR(-EBUSY); } /* -- cgit v1.2.1 From dc1a3e8e0cc6b2293b48c044710e63395aeb4fb4 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sun, 18 Aug 2019 19:18:34 -0500 Subject: dm raid: add missing cleanup in raid_ctr() If rs_prepare_reshape() fails, no cleanup is executed, leading to leak of the raid_set structure allocated at the beginning of raid_ctr(). To fix this issue, go to the label 'bad' if the error occurs. Fixes: 11e4723206683 ("dm raid: stop keeping raid set frozen altogether") Cc: stable@vger.kernel.org Signed-off-by: Wenwen Wang Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 8a60a4a070ac..1f933dd197cd 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3194,7 +3194,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) */ r = rs_prepare_reshape(rs); if (r) - return r; + goto bad; /* Reshaping ain't recovery, so disable recovery */ rs_setup_recovery(rs, MaxSector); -- cgit v1.2.1 From 3afa758cfb6efe0dc94dd8500add7222f503bfa5 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 20 Aug 2019 21:16:27 -0500 Subject: usb: udc: lpc32xx: silence fall-through warning Silence the following fall-through warning by adding a break statement: drivers/usb/gadget/udc/lpc32xx_udc.c:2230:3: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20190821021627.GA2679@embeddedor Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/lpc32xx_udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/lpc32xx_udc.c b/drivers/usb/gadget/udc/lpc32xx_udc.c index 5f1b14f3e5a0..bb6af6b5ac97 100644 --- a/drivers/usb/gadget/udc/lpc32xx_udc.c +++ b/drivers/usb/gadget/udc/lpc32xx_udc.c @@ -2265,7 +2265,7 @@ static void udc_handle_ep0_setup(struct lpc32xx_udc *udc) default: break; } - + break; case USB_REQ_SET_ADDRESS: if (reqtype == (USB_TYPE_STANDARD | USB_RECIP_DEVICE)) { -- cgit v1.2.1 From de7b9aa633b693e77942e12f1769506efae6917b Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 20 Aug 2019 11:28:25 +0200 Subject: usbtmc: more sanity checking for packet size A malicious device can make the driver divide ny zero with a nonsense maximum packet size. Signed-off-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20190820092826.17694-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 4942122b2346..36858ddd8d9b 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -2362,8 +2362,11 @@ static int usbtmc_probe(struct usb_interface *intf, goto err_put; } + retcode = -EINVAL; data->bulk_in = bulk_in->bEndpointAddress; data->wMaxPacketSize = usb_endpoint_maxp(bulk_in); + if (!data->wMaxPacketSize) + goto err_put; dev_dbg(&intf->dev, "Found bulk in endpoint at %u\n", data->bulk_in); data->bulk_out = bulk_out->bEndpointAddress; -- cgit v1.2.1 From cbe85c88ce80fb92956a0793518d415864dcead8 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 20 Aug 2019 02:07:58 +0000 Subject: usb: chipidea: udc: don't do hardware access if gadget has stopped After _gadget_stop_activity is executed, we can consider the hardware operation for gadget has finished, and the udc can be stopped and enter low power mode. So, any later hardware operations (from usb_ep_ops APIs or usb_gadget_ops APIs) should be considered invalid, any deinitializatons has been covered at _gadget_stop_activity. I meet this problem when I plug out usb cable from PC using mass_storage gadget, my callstack like: vbus interrupt->.vbus_session-> composite_disconnect ->pm_runtime_put_sync(&_gadget->dev), the composite_disconnect will call fsg_disable, but fsg_disable calls usb_ep_disable using async way, there are register accesses for usb_ep_disable. So sometimes, I get system hang due to visit register without clock, sometimes not. The Linux Kernel USB maintainer Alan Stern suggests this kinds of solution. See: http://marc.info/?l=linux-usb&m=138541769810983&w=2. Cc: #v4.9+ Signed-off-by: Peter Chen Link: https://lore.kernel.org/r/20190820020503.27080-2-peter.chen@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/udc.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 6a5ee8e6da10..67ad40b0a05b 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -709,12 +709,6 @@ static int _gadget_stop_activity(struct usb_gadget *gadget) struct ci_hdrc *ci = container_of(gadget, struct ci_hdrc, gadget); unsigned long flags; - spin_lock_irqsave(&ci->lock, flags); - ci->gadget.speed = USB_SPEED_UNKNOWN; - ci->remote_wakeup = 0; - ci->suspended = 0; - spin_unlock_irqrestore(&ci->lock, flags); - /* flush all endpoints */ gadget_for_each_ep(ep, gadget) { usb_ep_fifo_flush(ep); @@ -732,6 +726,12 @@ static int _gadget_stop_activity(struct usb_gadget *gadget) ci->status = NULL; } + spin_lock_irqsave(&ci->lock, flags); + ci->gadget.speed = USB_SPEED_UNKNOWN; + ci->remote_wakeup = 0; + ci->suspended = 0; + spin_unlock_irqrestore(&ci->lock, flags); + return 0; } @@ -1303,6 +1303,10 @@ static int ep_disable(struct usb_ep *ep) return -EBUSY; spin_lock_irqsave(hwep->lock, flags); + if (hwep->ci->gadget.speed == USB_SPEED_UNKNOWN) { + spin_unlock_irqrestore(hwep->lock, flags); + return 0; + } /* only internal SW should disable ctrl endpts */ @@ -1392,6 +1396,10 @@ static int ep_queue(struct usb_ep *ep, struct usb_request *req, return -EINVAL; spin_lock_irqsave(hwep->lock, flags); + if (hwep->ci->gadget.speed == USB_SPEED_UNKNOWN) { + spin_unlock_irqrestore(hwep->lock, flags); + return 0; + } retval = _ep_queue(ep, req, gfp_flags); spin_unlock_irqrestore(hwep->lock, flags); return retval; @@ -1415,8 +1423,8 @@ static int ep_dequeue(struct usb_ep *ep, struct usb_request *req) return -EINVAL; spin_lock_irqsave(hwep->lock, flags); - - hw_ep_flush(hwep->ci, hwep->num, hwep->dir); + if (hwep->ci->gadget.speed != USB_SPEED_UNKNOWN) + hw_ep_flush(hwep->ci, hwep->num, hwep->dir); list_for_each_entry_safe(node, tmpnode, &hwreq->tds, td) { dma_pool_free(hwep->td_pool, node->ptr, node->dma); @@ -1487,6 +1495,10 @@ static void ep_fifo_flush(struct usb_ep *ep) } spin_lock_irqsave(hwep->lock, flags); + if (hwep->ci->gadget.speed == USB_SPEED_UNKNOWN) { + spin_unlock_irqrestore(hwep->lock, flags); + return; + } hw_ep_flush(hwep->ci, hwep->num, hwep->dir); @@ -1559,6 +1571,10 @@ static int ci_udc_wakeup(struct usb_gadget *_gadget) int ret = 0; spin_lock_irqsave(&ci->lock, flags); + if (ci->gadget.speed == USB_SPEED_UNKNOWN) { + spin_unlock_irqrestore(&ci->lock, flags); + return 0; + } if (!ci->remote_wakeup) { ret = -EOPNOTSUPP; goto out; -- cgit v1.2.1 From 08d676d1685c2a29e4d0e1b0242324e564d4589e Mon Sep 17 00:00:00 2001 From: Henk van der Laan Date: Fri, 16 Aug 2019 22:08:47 +0200 Subject: usb-storage: Add new JMS567 revision to unusual_devs Revision 0x0117 suffers from an identical issue to earlier revisions, therefore it should be added to the quirks list. Signed-off-by: Henk van der Laan Cc: stable Link: https://lore.kernel.org/r/20190816200847.21366-1-opensource@henkvdlaan.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index ea0d27a94afe..1cd9b6305b06 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2100,7 +2100,7 @@ UNUSUAL_DEV( 0x14cd, 0x6600, 0x0201, 0x0201, US_FL_IGNORE_RESIDUE ), /* Reported by Michael Büsch */ -UNUSUAL_DEV( 0x152d, 0x0567, 0x0114, 0x0116, +UNUSUAL_DEV( 0x152d, 0x0567, 0x0114, 0x0117, "JMicron", "USB to ATA/ATAPI Bridge", USB_SC_DEVICE, USB_PR_DEVICE, NULL, -- cgit v1.2.1 From e4427372398c31f57450565de277f861a4db5b3b Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 10 Jun 2019 19:22:55 +0200 Subject: selftests/kvm: make platform_info_test pass on AMD test_msr_platform_info_disabled() generates EXIT_SHUTDOWN but VMCB state is undefined after that so an attempt to launch this guest again from test_msr_platform_info_enabled() fails. Reorder the tests to make test pass. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/platform_info_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index 40050e44ec0a..f9334bd3cce9 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -99,8 +99,8 @@ int main(int argc, char *argv[]) msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO); vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO); - test_msr_platform_info_disabled(vm); test_msr_platform_info_enabled(vm); + test_msr_platform_info_disabled(vm); vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info); kvm_vm_free(vm); -- cgit v1.2.1 From 1bc8d18c75fef3b478dbdfef722aae09e2a9fde7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 21 Aug 2019 20:00:02 +0200 Subject: ALSA: line6: Fix memory leak at line6_init_pcm() error path I forgot to release the allocated object at the early error path in line6_init_pcm(). For addressing it, slightly shuffle the code so that the PCM destructor (pcm->private_free) is assigned properly before all error paths. Fixes: 3450121997ce ("ALSA: line6: Fix write on zero-sized buffer") Cc: Signed-off-by: Takashi Iwai --- sound/usb/line6/pcm.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c index 2c03e0f6bf72..f70211e6b174 100644 --- a/sound/usb/line6/pcm.c +++ b/sound/usb/line6/pcm.c @@ -550,6 +550,15 @@ int line6_init_pcm(struct usb_line6 *line6, line6pcm->volume_monitor = 255; line6pcm->line6 = line6; + spin_lock_init(&line6pcm->out.lock); + spin_lock_init(&line6pcm->in.lock); + line6pcm->impulse_period = LINE6_IMPULSE_DEFAULT_PERIOD; + + line6->line6pcm = line6pcm; + + pcm->private_data = line6pcm; + pcm->private_free = line6_cleanup_pcm; + line6pcm->max_packet_size_in = usb_maxpacket(line6->usbdev, usb_rcvisocpipe(line6->usbdev, ep_read), 0); @@ -562,15 +571,6 @@ int line6_init_pcm(struct usb_line6 *line6, return -EINVAL; } - spin_lock_init(&line6pcm->out.lock); - spin_lock_init(&line6pcm->in.lock); - line6pcm->impulse_period = LINE6_IMPULSE_DEFAULT_PERIOD; - - line6->line6pcm = line6pcm; - - pcm->private_data = line6pcm; - pcm->private_free = line6_cleanup_pcm; - err = line6_create_audio_out_urbs(line6pcm); if (err < 0) return err; -- cgit v1.2.1 From aad12c2394189f606ce0308ab65505fdd9081a10 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 21 Aug 2019 14:29:29 +0300 Subject: trivial: netns: fix typo in 'struct net.passive' description Replace 'decided' with 'decide' so that comment would be /* To decide when the network namespace should be freed. */ Signed-off-by: Mike Rapoport Signed-off-by: David S. Miller --- include/net/net_namespace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index cb668bc2692d..ab40d7afdc54 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -52,7 +52,7 @@ struct bpf_prog; #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) struct net { - refcount_t passive; /* To decided when the network + refcount_t passive; /* To decide when the network * namespace should be freed. */ refcount_t count; /* To decided when the network -- cgit v1.2.1 From 7846f58fba964af7cb8cf77d4d13c33254725211 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Wed, 21 Aug 2019 12:25:13 -0700 Subject: x86/boot: Fix boot regression caused by bootparam sanitizing commit a90118c445cc ("x86/boot: Save fields explicitly, zero out everything else") had two errors: * It preserved boot_params.acpi_rsdp_addr, and * It failed to preserve boot_params.hdr Therefore, zero out acpi_rsdp_addr, and preserve hdr. Fixes: a90118c445cc ("x86/boot: Save fields explicitly, zero out everything else") Reported-by: Neil MacLeod Suggested-by: Thomas Gleixner Signed-off-by: John Hubbard Signed-off-by: Thomas Gleixner Tested-by: Neil MacLeod Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190821192513.20126-1-jhubbard@nvidia.com --- arch/x86/include/asm/bootparam_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index f5e90a849bca..9e5f3c722c33 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -59,7 +59,6 @@ static void sanitize_boot_params(struct boot_params *boot_params) BOOT_PARAM_PRESERVE(apm_bios_info), BOOT_PARAM_PRESERVE(tboot_addr), BOOT_PARAM_PRESERVE(ist_info), - BOOT_PARAM_PRESERVE(acpi_rsdp_addr), BOOT_PARAM_PRESERVE(hd0_info), BOOT_PARAM_PRESERVE(hd1_info), BOOT_PARAM_PRESERVE(sys_desc_table), @@ -71,6 +70,7 @@ static void sanitize_boot_params(struct boot_params *boot_params) BOOT_PARAM_PRESERVE(eddbuf_entries), BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries), BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer), + BOOT_PARAM_PRESERVE(hdr), BOOT_PARAM_PRESERVE(e820_table), BOOT_PARAM_PRESERVE(eddbuf), }; -- cgit v1.2.1 From 2d683eaaeeb9d33d23674ae635e0ef1448523d18 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 21 Aug 2019 16:41:23 +0200 Subject: net: cpsw: fix NULL pointer exception in the probe error path In certain cases when the probe function fails the error path calls cpsw_remove_dt() before calling platform_set_drvdata(). This is an issue as cpsw_remove_dt() uses platform_get_drvdata() to retrieve the cpsw_common data and leds to a NULL pointer exception. This patches fixes it by calling platform_set_drvdata() earlier in the probe. Fixes: 83a8471ba255 ("net: ethernet: ti: cpsw: refactor probe to group common hw initialization") Reported-by: Maxime Chevallier Signed-off-by: Antoine Tenart Reviewed-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 32a89744972d..a46b8b2e44e1 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2775,6 +2775,7 @@ static int cpsw_probe(struct platform_device *pdev) if (!cpsw) return -ENOMEM; + platform_set_drvdata(pdev, cpsw); cpsw->dev = dev; mode = devm_gpiod_get_array_optional(dev, "mode", GPIOD_OUT_LOW); @@ -2879,7 +2880,6 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_cpts; } - platform_set_drvdata(pdev, cpsw); priv = netdev_priv(ndev); priv->cpsw = cpsw; priv->ndev = ndev; -- cgit v1.2.1 From 98f58ada2d37e68125c056f1fc005748251879c2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 15 Aug 2019 08:27:09 -0500 Subject: drm/amdgpu/gfx9: update pg_flags after determining if gfx off is possible We need to set certain power gating flags after we determine if the firmware version is sufficient to support gfxoff. Previously we set the pg flags in early init, but we later we might have disabled gfxoff if the firmware versions didn't support it. Move adding the additional pg flags after we determine whether or not to support gfxoff. Fixes: 005440066f92 ("drm/amdgpu: enable gfxoff again on raven series (v2)") Tested-by: Kai-Heng Feng Tested-by: Tom St Denis Signed-off-by: Alex Deucher Cc: Kai-Heng Feng Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/soc15.c | 5 ----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 04b8ac4432c7..4ea67f94cae2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -604,6 +604,10 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) (adev->gfx.rlc_feature_version < 1) || !adev->gfx.rlc.is_rlc_v2_1) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + if (adev->pm.pp_feature & PP_GFXOFF_MASK) + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_RLC_SMU_HS; break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 23265414d448..04fbf05d7176 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -992,11 +992,6 @@ static int soc15_common_early_init(void *handle) adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; } - - if (adev->pm.pp_feature & PP_GFXOFF_MASK) - adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | - AMD_PG_SUPPORT_CP | - AMD_PG_SUPPORT_RLC_SMU_HS; break; default: /* FIXME: not supported yet */ -- cgit v1.2.1 From 00430144ff7343369222a110985aaa6726fb26e0 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Mon, 19 Aug 2019 23:38:02 +0800 Subject: drm/amd/powerplay: fix variable type errors in smu_v11_0_setup_pptable fix size type errors, from uint32_t to uint16_t. it will cause only initializes the highest 16 bits in smu_get_atom_data_table function. bug report: This fixes the following static checker warning. drivers/gpu/drm/amd/amdgpu/../powerplay/smu_v11_0.c:390 smu_v11_0_setup_pptable() warn: passing casted pointer '&size' to 'smu_get_atom_data_table()' 32 vs 16. Signed-off-by: Kevin Wang Reported-by: Dan Carpenter Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index 5fde5cf65b42..3ac061a3c3c5 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -327,6 +327,7 @@ static int smu_v11_0_setup_pptable(struct smu_context *smu) const struct smc_firmware_header_v1_0 *hdr; int ret, index; uint32_t size; + uint16_t atom_table_size; uint8_t frev, crev; void *table; uint16_t version_major, version_minor; @@ -354,10 +355,11 @@ static int smu_v11_0_setup_pptable(struct smu_context *smu) index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, powerplayinfo); - ret = smu_get_atom_data_table(smu, index, (uint16_t *)&size, &frev, &crev, + ret = smu_get_atom_data_table(smu, index, &atom_table_size, &frev, &crev, (uint8_t **)&table); if (ret) return ret; + size = atom_table_size; } if (!smu->smu_table.power_play_table) -- cgit v1.2.1 From 155f85c0d56896552439fd4fb5f43dfc4e9f842a Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Tue, 20 Aug 2019 13:28:51 +0800 Subject: drm/amd/powerplay: remove duplicate macro smu_get_uclk_dpm_states in amdgpu_smu.h remove duplicate macro smu_get_uclk_dpm_states in amdgpu_smu.h " #define smu_get_uclk_dpm_states(smu, clocks_in_khz, num_states) \ ((smu)->ppt_funcs->get_uclk_dpm_states ? (smu)->ppt_funcs->get_uclk_dpm_states((smu), (clocks_in_khz), (num_states)) : 0) #define smu_get_max_sustainable_clocks_by_dc(smu, max_clocks) \ ((smu)->funcs->get_max_sustainable_clocks_by_dc ? (smu)->funcs->get_max_sustainable_clocks_by_dc((smu), (max_clocks)) : 0) #define smu_get_uclk_dpm_states(smu, clocks_in_khz, num_states) \ ((smu)->ppt_funcs->get_uclk_dpm_states ? (smu)->ppt_funcs->get_uclk_dpm_states((smu), (clocks_in_khz), (num_states)) : 0) " Signed-off-by: Kevin Wang Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index a0f52c86d8c7..a78b2e295895 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -907,8 +907,6 @@ struct smu_funcs ((smu)->funcs->register_irq_handler ? (smu)->funcs->register_irq_handler(smu) : 0) #define smu_set_azalia_d3_pme(smu) \ ((smu)->funcs->set_azalia_d3_pme ? (smu)->funcs->set_azalia_d3_pme((smu)) : 0) -#define smu_get_uclk_dpm_states(smu, clocks_in_khz, num_states) \ - ((smu)->ppt_funcs->get_uclk_dpm_states ? (smu)->ppt_funcs->get_uclk_dpm_states((smu), (clocks_in_khz), (num_states)) : 0) #define smu_get_max_sustainable_clocks_by_dc(smu, max_clocks) \ ((smu)->funcs->get_max_sustainable_clocks_by_dc ? (smu)->funcs->get_max_sustainable_clocks_by_dc((smu), (max_clocks)) : 0) #define smu_get_uclk_dpm_states(smu, clocks_in_khz, num_states) \ -- cgit v1.2.1 From 221a2bdbd5d3871a5f41d912b2f06cc02e8f8b38 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 20 Aug 2019 15:11:37 +0800 Subject: drm/amd/amdgpu: disable MMHUB PG for navi10 Disable MMHUB PG for navi10 according to the production requirement. Signed-off-by: Kenneth Feng Reviewed-by: Hawking Zhang Reviewed-by: Kevin Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 662612f89c70..9922bce3fd89 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -552,7 +552,6 @@ static int nv_common_early_init(void *handle) AMD_CG_SUPPORT_BIF_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | - AMD_PG_SUPPORT_MMHUB | AMD_PG_SUPPORT_ATHUB; adev->external_rev_id = adev->rev_id + 0x1; break; -- cgit v1.2.1 From 1a701ea924815b0518733aa8d5d05c1f6fa87062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Tue, 20 Aug 2019 15:39:53 +0200 Subject: drm/amdgpu: prevent memory leaks in AMDGPU_CS ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Error out if the AMDGPU_CS ioctl is called with multiple SYNCOBJ_OUT and/or TIMELINE_SIGNAL chunks, since otherwise the last chunk wins while the allocated array as well as the reference counts of sync objects are leaked. Signed-off-by: Nicolai Hähnle Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 4e4094f842e7..8b26c970a3cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1143,6 +1143,9 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, num_deps = chunk->length_dw * 4 / sizeof(struct drm_amdgpu_cs_chunk_sem); + if (p->post_deps) + return -EINVAL; + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), GFP_KERNEL); p->num_post_deps = 0; @@ -1166,8 +1169,7 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk - *chunk) + struct amdgpu_cs_chunk *chunk) { struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; unsigned num_deps; @@ -1177,6 +1179,9 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p num_deps = chunk->length_dw * 4 / sizeof(struct drm_amdgpu_cs_chunk_syncobj); + if (p->post_deps) + return -EINVAL; + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), GFP_KERNEL); p->num_post_deps = 0; -- cgit v1.2.1 From ec6e491353b9024d4b1a65c48b21e3bc0faeae4e Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 21 Aug 2019 11:27:13 -0400 Subject: drm/amd/display: Calculate bpc based on max_requested_bpc [Why] The only place where state->max_bpc is updated on the connector is at the start of atomic check during drm_atomic_connector_check. It isn't updated when adding the connectors to the atomic state after the fact. It also doesn't necessarily reflect the right value when called in amdgpu during mode validation outside of atomic check. This can cause the wrong bpc to be used even if the max_requested_bpc is the correct value. [How] Don't rely on state->max_bpc reflecting the real bpc value and just do the min(...) based on display info bpc and max_requested_bpc. Fixes: 01933ba42d3d ("drm/amd/display: Use current connector state if NULL when checking bpc") Signed-off-by: Nicholas Kazlauskas Reviewed-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4a29f72334d0..45be7a2132bb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3131,13 +3131,25 @@ static enum dc_color_depth convert_color_depth_from_display_info(const struct drm_connector *connector, const struct drm_connector_state *state) { - uint32_t bpc = connector->display_info.bpc; + uint8_t bpc = (uint8_t)connector->display_info.bpc; + + /* Assume 8 bpc by default if no bpc is specified. */ + bpc = bpc ? bpc : 8; if (!state) state = connector->state; if (state) { - bpc = state->max_bpc; + /* + * Cap display bpc based on the user requested value. + * + * The value for state->max_bpc may not correctly updated + * depending on when the connector gets added to the state + * or if this was called outside of atomic check, so it + * can't be used directly. + */ + bpc = min(bpc, state->max_requested_bpc); + /* Round down to the nearest even number. */ bpc = bpc - (bpc & 1); } -- cgit v1.2.1 From 738a2e4b1774fe0d20d6c027a7cbafb6a1619675 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 21 Aug 2019 17:07:46 -0700 Subject: net: dsa: bcm_sf2: Do not configure PHYLINK on CPU port The SF2 binding does not specify that the CPU port should have properties mandatory for successfully instantiating a PHYLINK object. As such, there will be missing properties (including fixed-link) and when attempting to validate and later configure link modes, we will have an incorrect set of parameters (interface, speed, duplex). Simply prevent the CPU port from being configured through PHYLINK since bcm_sf2_imp_setup() takes care of that already. Fixes: 0e27921816ad ("net: dsa: Use PHYLINK for the CPU/DSA ports") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 3811fdbda13e..28c963a21dac 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -478,6 +478,7 @@ static void bcm_sf2_sw_validate(struct dsa_switch *ds, int port, unsigned long *supported, struct phylink_link_state *state) { + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; if (!phy_interface_mode_is_rgmii(state->interface) && @@ -487,8 +488,10 @@ static void bcm_sf2_sw_validate(struct dsa_switch *ds, int port, state->interface != PHY_INTERFACE_MODE_INTERNAL && state->interface != PHY_INTERFACE_MODE_MOCA) { bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); - dev_err(ds->dev, - "Unsupported interface: %d\n", state->interface); + if (port != core_readl(priv, CORE_IMP0_PRT_ID)) + dev_err(ds->dev, + "Unsupported interface: %d for port %d\n", + state->interface, port); return; } @@ -526,6 +529,9 @@ static void bcm_sf2_sw_mac_config(struct dsa_switch *ds, int port, u32 id_mode_dis = 0, port_mode; u32 reg, offset; + if (port == core_readl(priv, CORE_IMP0_PRT_ID)) + return; + if (priv->type == BCM7445_DEVICE_ID) offset = CORE_STS_OVERRIDE_GMIIP_PORT(port); else -- cgit v1.2.1 From f17f7648a49aa6728649ddf79bdbcac4f1970ce4 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 20 Aug 2019 10:19:47 +0800 Subject: ipv6/addrconf: allow adding multicast addr if IFA_F_MCAUTOJOIN is set In commit 93a714d6b53d ("multicast: Extend ip address command to enable multicast group join/leave on") we added a new flag IFA_F_MCAUTOJOIN to make user able to add multicast address on ethernet interface. This works for IPv4, but not for IPv6. See the inet6_addr_add code. static int inet6_addr_add() { ... if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) { ipv6_mc_config(net->ipv6.mc_autojoin_sk, true...) } ifp = ipv6_add_addr(idev, cfg, true, extack); <- always fail with maddr if (!IS_ERR(ifp)) { ... } else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) { ipv6_mc_config(net->ipv6.mc_autojoin_sk, false...) } } But in ipv6_add_addr() it will check the address type and reject multicast address directly. So this feature is never worked for IPv6. We should not remove the multicast address check totally in ipv6_add_addr(), but could accept multicast address only when IFA_F_MCAUTOJOIN flag supplied. v2: update commit description Fixes: 93a714d6b53d ("multicast: Extend ip address command to enable multicast group join/leave on") Reported-by: Jianlin Shi Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index dc73888c7859..ced995f3fec4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1045,7 +1045,8 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, int err = 0; if (addr_type == IPV6_ADDR_ANY || - addr_type & IPV6_ADDR_MULTICAST || + (addr_type & IPV6_ADDR_MULTICAST && + !(cfg->ifa_flags & IFA_F_MCAUTOJOIN)) || (!(idev->dev->flags & IFF_LOOPBACK) && !netif_is_l3_master(idev->dev) && addr_type & IPV6_ADDR_LOOPBACK)) -- cgit v1.2.1 From 0f404bbdaf1624f4d25dd67da7ff85eab005beac Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 20 Aug 2019 10:46:00 +0800 Subject: net: fix icmp_socket_deliver argument 2 input it expects a unsigned int, but got a __be32 Signed-off-by: Li RongQing Signed-off-by: Zhang Yu Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1510e951f451..bf7b5d45de99 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -902,7 +902,7 @@ static bool icmp_redirect(struct sk_buff *skb) return false; } - icmp_socket_deliver(skb, icmp_hdr(skb)->un.gateway); + icmp_socket_deliver(skb, ntohl(icmp_hdr(skb)->un.gateway)); return true; } -- cgit v1.2.1 From cc07db5a5b100bc8eaab5097a23d72f858979750 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 20 Aug 2019 12:11:44 +0300 Subject: gve: Copy and paste bug in gve_get_stats() There is a copy and paste error so we have "rx" where "tx" was intended in the priv->tx[] array. Fixes: f5cedc84a30d ("gve: Add transmit and receive support") Signed-off-by: Dan Carpenter Reviewed-by: Catherine Sullivan Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 497298752381..aca95f64bde8 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -50,7 +50,7 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) u64_stats_fetch_begin(&priv->tx[ring].statss); s->tx_packets += priv->tx[ring].pkt_done; s->tx_bytes += priv->tx[ring].bytes_done; - } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + } while (u64_stats_fetch_retry(&priv->tx[ring].statss, start)); } } -- cgit v1.2.1 From 7035eef4496d95b69b0bc18e0bced09304e0afdf Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 21 Aug 2019 11:45:25 -0700 Subject: md: update MAINTAINERS info I have been reviewing patches for md in the past few months. Mark me as the MD maintainer, as I have effectively been filling that role. Cc: NeilBrown Signed-off-by: Song Liu Signed-off-by: Jens Axboe --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 43604d6ab96c..eae4e0d1117a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14882,9 +14882,9 @@ F: include/linux/arm_sdei.h F: include/uapi/linux/arm_sdei.h SOFTWARE RAID (Multiple Disks) SUPPORT -M: Shaohua Li +M: Song Liu L: linux-raid@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/song/md.git S: Supported F: drivers/md/Makefile F: drivers/md/Kconfig -- cgit v1.2.1 From 5871cd93692c8071fb9358daccb715b5081316ac Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Mon, 19 Aug 2019 22:23:27 +0000 Subject: crypto: ccp - Ignore unconfigured CCP device on suspend/resume If a CCP is unconfigured (e.g. there are no available queues) then there will be no data structures allocated for the device. Thus, we must check for validity of a pointer before trying to access structure members. Fixes: 720419f01832f ("crypto: ccp - Introduce the AMD Secure Processor device") Cc: Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-dev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c index f3ff36f93207..db99af89ef18 100644 --- a/drivers/crypto/ccp/ccp-dev.c +++ b/drivers/crypto/ccp/ccp-dev.c @@ -540,6 +540,10 @@ int ccp_dev_suspend(struct sp_device *sp, pm_message_t state) unsigned long flags; unsigned int i; + /* If there's no device there's nothing to do */ + if (!ccp) + return 0; + spin_lock_irqsave(&ccp->cmd_lock, flags); ccp->suspending = 1; @@ -564,6 +568,10 @@ int ccp_dev_resume(struct sp_device *sp) unsigned long flags; unsigned int i; + /* If there's no device there's nothing to do */ + if (!ccp) + return 0; + spin_lock_irqsave(&ccp->cmd_lock, flags); ccp->suspending = 0; -- cgit v1.2.1 From f9f0e9ed350e15d51ad07364b4cf910de50c472a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 20 Aug 2019 21:43:42 +0200 Subject: ALSA: usb-audio: Check mixer unit bitmap yet more strictly The bmControls (for UAC1) or bmMixerControls (for UAC2/3) bitmap has a variable size depending on both input and output pins. Its size is to fit with input * output bits. The problem is that the input size can't be determined simply from the unit descriptor itself but it needs to parse the whole connected sources. Although the uac_mixer_unit_get_channels() tries to check some possible overflow of this bitmap, it's incomplete due to the lack of the evaluation of input pins. For covering possible overflows, this patch adds the bitmap overflow check in the loop of input pins in parse_audio_mixer_unit(). Fixes: 0bfe5e434e66 ("ALSA: usb-audio: Check mixer unit descriptors more strictly") Cc: Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index b5927c3d5bc0..eceab19766db 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -739,7 +739,6 @@ static int uac_mixer_unit_get_channels(struct mixer_build *state, struct uac_mixer_unit_descriptor *desc) { int mu_channels; - void *c; if (desc->bLength < sizeof(*desc)) return -EINVAL; @@ -762,13 +761,6 @@ static int uac_mixer_unit_get_channels(struct mixer_build *state, break; } - if (!mu_channels) - return 0; - - c = uac_mixer_unit_bmControls(desc, state->mixer->protocol); - if (c - (void *)desc + (mu_channels - 1) / 8 >= desc->bLength) - return 0; /* no bmControls -> skip */ - return mu_channels; } @@ -2009,6 +2001,31 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, * Mixer Unit */ +/* check whether the given in/out overflows bmMixerControls matrix */ +static bool mixer_bitmap_overflow(struct uac_mixer_unit_descriptor *desc, + int protocol, int num_ins, int num_outs) +{ + u8 *hdr = (u8 *)desc; + u8 *c = uac_mixer_unit_bmControls(desc, protocol); + size_t rest; /* remaining bytes after bmMixerControls */ + + switch (protocol) { + case UAC_VERSION_1: + default: + rest = 1; /* iMixer */ + break; + case UAC_VERSION_2: + rest = 2; /* bmControls + iMixer */ + break; + case UAC_VERSION_3: + rest = 6; /* bmControls + wMixerDescrStr */ + break; + } + + /* overflow? */ + return c + (num_ins * num_outs + 7) / 8 + rest > hdr + hdr[0]; +} + /* * build a mixer unit control * @@ -2137,6 +2154,9 @@ static int parse_audio_mixer_unit(struct mixer_build *state, int unitid, if (err < 0) return err; num_ins += iterm.channels; + if (mixer_bitmap_overflow(desc, state->mixer->protocol, + num_ins, num_outs)) + break; for (; ich < num_ins; ich++) { int och, ich_has_controls = 0; -- cgit v1.2.1 From 5c498950f730aa17c5f8a2cdcb903524e4002ed2 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 19 Jul 2019 15:32:19 +0100 Subject: libceph: allow ceph_buffer_put() to receive a NULL ceph_buffer Signed-off-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/buffer.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/ceph/buffer.h b/include/linux/ceph/buffer.h index 5e58bb29b1a3..11cdc7c60480 100644 --- a/include/linux/ceph/buffer.h +++ b/include/linux/ceph/buffer.h @@ -30,7 +30,8 @@ static inline struct ceph_buffer *ceph_buffer_get(struct ceph_buffer *b) static inline void ceph_buffer_put(struct ceph_buffer *b) { - kref_put(&b->kref, ceph_buffer_release); + if (b) + kref_put(&b->kref, ceph_buffer_release); } extern int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end); -- cgit v1.2.1 From 86968ef21596515958d5f0a40233d02be78ecec0 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 19 Jul 2019 15:32:20 +0100 Subject: ceph: fix buffer free while holding i_ceph_lock in __ceph_setxattr() Calling ceph_buffer_put() in __ceph_setxattr() may end up freeing the i_xattrs.prealloc_blob buffer while holding the i_ceph_lock. This can be fixed by postponing the call until later, when the lock is released. The following backtrace was triggered by fstests generic/117. BUG: sleeping function called from invalid context at mm/vmalloc.c:2283 in_atomic(): 1, irqs_disabled(): 0, pid: 650, name: fsstress 3 locks held by fsstress/650: #0: 00000000870a0fe8 (sb_writers#8){.+.+}, at: mnt_want_write+0x20/0x50 #1: 00000000ba0c4c74 (&type->i_mutex_dir_key#6){++++}, at: vfs_setxattr+0x55/0xa0 #2: 000000008dfbb3f2 (&(&ci->i_ceph_lock)->rlock){+.+.}, at: __ceph_setxattr+0x297/0x810 CPU: 1 PID: 650 Comm: fsstress Not tainted 5.2.0+ #437 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x67/0x90 ___might_sleep.cold+0x9f/0xb1 vfree+0x4b/0x60 ceph_buffer_release+0x1b/0x60 __ceph_setxattr+0x2b4/0x810 __vfs_setxattr+0x66/0x80 __vfs_setxattr_noperm+0x59/0xf0 vfs_setxattr+0x81/0xa0 setxattr+0x115/0x230 ? filename_lookup+0xc9/0x140 ? rcu_read_lock_sched_held+0x74/0x80 ? rcu_sync_lockdep_assert+0x2e/0x60 ? __sb_start_write+0x142/0x1a0 ? mnt_want_write+0x20/0x50 path_setxattr+0xba/0xd0 __x64_sys_lsetxattr+0x24/0x30 do_syscall_64+0x50/0x1c0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7ff23514359a Signed-off-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/xattr.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 37b458a9af3a..c083557b3657 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -1036,6 +1036,7 @@ int __ceph_setxattr(struct inode *inode, const char *name, struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_cap_flush *prealloc_cf = NULL; + struct ceph_buffer *old_blob = NULL; int issued; int err; int dirty = 0; @@ -1109,13 +1110,15 @@ retry: struct ceph_buffer *blob; spin_unlock(&ci->i_ceph_lock); - dout(" preaallocating new blob size=%d\n", required_blob_size); + ceph_buffer_put(old_blob); /* Shouldn't be required */ + dout(" pre-allocating new blob size=%d\n", required_blob_size); blob = ceph_buffer_new(required_blob_size, GFP_NOFS); if (!blob) goto do_sync_unlocked; spin_lock(&ci->i_ceph_lock); + /* prealloc_blob can't be released while holding i_ceph_lock */ if (ci->i_xattrs.prealloc_blob) - ceph_buffer_put(ci->i_xattrs.prealloc_blob); + old_blob = ci->i_xattrs.prealloc_blob; ci->i_xattrs.prealloc_blob = blob; goto retry; } @@ -1131,6 +1134,7 @@ retry: } spin_unlock(&ci->i_ceph_lock); + ceph_buffer_put(old_blob); if (lock_snap_rwsem) up_read(&mdsc->snap_rwsem); if (dirty) -- cgit v1.2.1 From 12fe3dda7ed89c95cc0ef7abc001ad1ad3e092f8 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 19 Jul 2019 15:32:21 +0100 Subject: ceph: fix buffer free while holding i_ceph_lock in __ceph_build_xattrs_blob() Calling ceph_buffer_put() in __ceph_build_xattrs_blob() may result in freeing the i_xattrs.blob buffer while holding the i_ceph_lock. This can be fixed by having this function returning the old blob buffer and have the callers of this function freeing it when the lock is released. The following backtrace was triggered by fstests generic/117. BUG: sleeping function called from invalid context at mm/vmalloc.c:2283 in_atomic(): 1, irqs_disabled(): 0, pid: 649, name: fsstress 4 locks held by fsstress/649: #0: 00000000a7478e7e (&type->s_umount_key#19){++++}, at: iterate_supers+0x77/0xf0 #1: 00000000f8de1423 (&(&ci->i_ceph_lock)->rlock){+.+.}, at: ceph_check_caps+0x7b/0xc60 #2: 00000000562f2b27 (&s->s_mutex){+.+.}, at: ceph_check_caps+0x3bd/0xc60 #3: 00000000f83ce16a (&mdsc->snap_rwsem){++++}, at: ceph_check_caps+0x3ed/0xc60 CPU: 1 PID: 649 Comm: fsstress Not tainted 5.2.0+ #439 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x67/0x90 ___might_sleep.cold+0x9f/0xb1 vfree+0x4b/0x60 ceph_buffer_release+0x1b/0x60 __ceph_build_xattrs_blob+0x12b/0x170 __send_cap+0x302/0x540 ? __lock_acquire+0x23c/0x1e40 ? __mark_caps_flushing+0x15c/0x280 ? _raw_spin_unlock+0x24/0x30 ceph_check_caps+0x5f0/0xc60 ceph_flush_dirty_caps+0x7c/0x150 ? __ia32_sys_fdatasync+0x20/0x20 ceph_sync_fs+0x5a/0x130 iterate_supers+0x8f/0xf0 ksys_sync+0x4f/0xb0 __ia32_sys_sync+0xa/0x10 do_syscall_64+0x50/0x1c0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7fc6409ab617 Signed-off-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 5 ++++- fs/ceph/snap.c | 4 +++- fs/ceph/super.h | 2 +- fs/ceph/xattr.c | 11 ++++++++--- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index d98dcd976c80..ce0f5658720a 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1301,6 +1301,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, { struct ceph_inode_info *ci = cap->ci; struct inode *inode = &ci->vfs_inode; + struct ceph_buffer *old_blob = NULL; struct cap_msg_args arg; int held, revoking; int wake = 0; @@ -1365,7 +1366,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, ci->i_requested_max_size = arg.max_size; if (flushing & CEPH_CAP_XATTR_EXCL) { - __ceph_build_xattrs_blob(ci); + old_blob = __ceph_build_xattrs_blob(ci); arg.xattr_version = ci->i_xattrs.version; arg.xattr_buf = ci->i_xattrs.blob; } else { @@ -1409,6 +1410,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, spin_unlock(&ci->i_ceph_lock); + ceph_buffer_put(old_blob); + ret = send_cap_msg(&arg); if (ret < 0) { dout("error sending cap msg, must requeue %p\n", inode); diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 4c6494eb02b5..ccfcc66aaf44 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -465,6 +465,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) struct inode *inode = &ci->vfs_inode; struct ceph_cap_snap *capsnap; struct ceph_snap_context *old_snapc, *new_snapc; + struct ceph_buffer *old_blob = NULL; int used, dirty; capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); @@ -541,7 +542,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) capsnap->gid = inode->i_gid; if (dirty & CEPH_CAP_XATTR_EXCL) { - __ceph_build_xattrs_blob(ci); + old_blob = __ceph_build_xattrs_blob(ci); capsnap->xattr_blob = ceph_buffer_get(ci->i_xattrs.blob); capsnap->xattr_version = ci->i_xattrs.version; @@ -584,6 +585,7 @@ update_snapc: } spin_unlock(&ci->i_ceph_lock); + ceph_buffer_put(old_blob); kfree(capsnap); ceph_put_snap_context(old_snapc); } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index d2352fd95dbc..6b9f1ee7de85 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -926,7 +926,7 @@ extern int ceph_getattr(const struct path *path, struct kstat *stat, int __ceph_setxattr(struct inode *, const char *, const void *, size_t, int); ssize_t __ceph_getxattr(struct inode *, const char *, void *, size_t); extern ssize_t ceph_listxattr(struct dentry *, char *, size_t); -extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci); +extern struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci); extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci); extern const struct xattr_handler *ceph_xattr_handlers[]; diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index c083557b3657..939eab7aa219 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -754,12 +754,15 @@ static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size, /* * If there are dirty xattrs, reencode xattrs into the prealloc_blob - * and swap into place. + * and swap into place. It returns the old i_xattrs.blob (or NULL) so + * that it can be freed by the caller as the i_ceph_lock is likely to be + * held. */ -void __ceph_build_xattrs_blob(struct ceph_inode_info *ci) +struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci) { struct rb_node *p; struct ceph_inode_xattr *xattr = NULL; + struct ceph_buffer *old_blob = NULL; void *dest; dout("__build_xattrs_blob %p\n", &ci->vfs_inode); @@ -790,12 +793,14 @@ void __ceph_build_xattrs_blob(struct ceph_inode_info *ci) dest - ci->i_xattrs.prealloc_blob->vec.iov_base; if (ci->i_xattrs.blob) - ceph_buffer_put(ci->i_xattrs.blob); + old_blob = ci->i_xattrs.blob; ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob; ci->i_xattrs.prealloc_blob = NULL; ci->i_xattrs.dirty = false; ci->i_xattrs.version++; } + + return old_blob; } static inline int __get_request_mask(struct inode *in) { -- cgit v1.2.1 From af8a85a41734f37b67ba8ce69d56b685bee4ac48 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 19 Jul 2019 15:32:22 +0100 Subject: ceph: fix buffer free while holding i_ceph_lock in fill_inode() Calling ceph_buffer_put() in fill_inode() may result in freeing the i_xattrs.blob buffer while holding the i_ceph_lock. This can be fixed by postponing the call until later, when the lock is released. The following backtrace was triggered by fstests generic/070. BUG: sleeping function called from invalid context at mm/vmalloc.c:2283 in_atomic(): 1, irqs_disabled(): 0, pid: 3852, name: kworker/0:4 6 locks held by kworker/0:4/3852: #0: 000000004270f6bb ((wq_completion)ceph-msgr){+.+.}, at: process_one_work+0x1b8/0x5f0 #1: 00000000eb420803 ((work_completion)(&(&con->work)->work)){+.+.}, at: process_one_work+0x1b8/0x5f0 #2: 00000000be1c53a4 (&s->s_mutex){+.+.}, at: dispatch+0x288/0x1476 #3: 00000000559cb958 (&mdsc->snap_rwsem){++++}, at: dispatch+0x2eb/0x1476 #4: 000000000d5ebbae (&req->r_fill_mutex){+.+.}, at: dispatch+0x2fc/0x1476 #5: 00000000a83d0514 (&(&ci->i_ceph_lock)->rlock){+.+.}, at: fill_inode.isra.0+0xf8/0xf70 CPU: 0 PID: 3852 Comm: kworker/0:4 Not tainted 5.2.0+ #441 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58-prebuilt.qemu.org 04/01/2014 Workqueue: ceph-msgr ceph_con_workfn Call Trace: dump_stack+0x67/0x90 ___might_sleep.cold+0x9f/0xb1 vfree+0x4b/0x60 ceph_buffer_release+0x1b/0x60 fill_inode.isra.0+0xa9b/0xf70 ceph_fill_trace+0x13b/0xc70 ? dispatch+0x2eb/0x1476 dispatch+0x320/0x1476 ? __mutex_unlock_slowpath+0x4d/0x2a0 ceph_con_workfn+0xc97/0x2ec0 ? process_one_work+0x1b8/0x5f0 process_one_work+0x244/0x5f0 worker_thread+0x4d/0x3e0 kthread+0x105/0x140 ? process_one_work+0x5f0/0x5f0 ? kthread_park+0x90/0x90 ret_from_fork+0x3a/0x50 Signed-off-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 791f84a13bb8..18500edefc56 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -736,6 +736,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, int issued, new_issued, info_caps; struct timespec64 mtime, atime, ctime; struct ceph_buffer *xattr_blob = NULL; + struct ceph_buffer *old_blob = NULL; struct ceph_string *pool_ns = NULL; struct ceph_cap *new_cap = NULL; int err = 0; @@ -881,7 +882,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, if ((ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) && le64_to_cpu(info->xattr_version) > ci->i_xattrs.version) { if (ci->i_xattrs.blob) - ceph_buffer_put(ci->i_xattrs.blob); + old_blob = ci->i_xattrs.blob; ci->i_xattrs.blob = xattr_blob; if (xattr_blob) memcpy(ci->i_xattrs.blob->vec.iov_base, @@ -1022,8 +1023,8 @@ static int fill_inode(struct inode *inode, struct page *locked_page, out: if (new_cap) ceph_put_cap(mdsc, new_cap); - if (xattr_blob) - ceph_buffer_put(xattr_blob); + ceph_buffer_put(old_blob); + ceph_buffer_put(xattr_blob); ceph_put_string(pool_ns); return err; } -- cgit v1.2.1 From c95f1c5f436badb9bb87e9b30fd573f6b3d59423 Mon Sep 17 00:00:00 2001 From: Erqi Chen Date: Wed, 24 Jul 2019 10:26:09 +0800 Subject: ceph: clear page dirty before invalidate page clear_page_dirty_for_io(page) before mapping->a_ops->invalidatepage(). invalidatepage() clears page's private flag, if dirty flag is not cleared, the page may cause BUG_ON failure in ceph_set_page_dirty(). Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/40862 Signed-off-by: Erqi Chen Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index e078cc55b989..b3c8b886bf64 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -913,8 +913,9 @@ get_more_pages: if (page_offset(page) >= ceph_wbc.i_size) { dout("%p page eof %llu\n", page, ceph_wbc.i_size); - if (ceph_wbc.size_stable || - page_offset(page) >= i_size_read(inode)) + if ((ceph_wbc.size_stable || + page_offset(page) >= i_size_read(inode)) && + clear_page_dirty_for_io(page)) mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); unlock_page(page); -- cgit v1.2.1 From 28a282616f56990547b9dcd5c6fbd2001344664c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 15 Aug 2019 06:23:38 -0400 Subject: ceph: don't try fill file_lock on unsuccessful GETFILELOCK reply When ceph_mdsc_do_request returns an error, we can't assume that the filelock_reply pointer will be set. Only try to fetch fields out of the r_reply_info when it returns success. Cc: stable@vger.kernel.org Reported-by: Hector Martin Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/locks.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index ac9b53b89365..5083e238ad15 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -111,8 +111,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, req->r_wait_for_completion = ceph_lock_wait_for_completion; err = ceph_mdsc_do_request(mdsc, inode, req); - - if (operation == CEPH_MDS_OP_GETFILELOCK) { + if (!err && operation == CEPH_MDS_OP_GETFILELOCK) { fl->fl_pid = -le64_to_cpu(req->r_reply_info.filelock_reply->pid); if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) fl->fl_type = F_RDLCK; -- cgit v1.2.1 From a561372405cf6bc6f14239b3a9e57bb39f2788b0 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 20 Aug 2019 16:40:33 +0200 Subject: libceph: fix PG split vs OSD (re)connect race We can't rely on ->peer_features in calc_target() because it may be called both when the OSD session is established and open and when it's not. ->peer_features is not valid unless the OSD session is open. If this happens on a PG split (pg_num increase), that could mean we don't resend a request that should have been resent, hanging the client indefinitely. In userspace this was fixed by looking at require_osd_release and get_xinfo[osd].features fields of the osdmap. However these fields belong to the OSD section of the osdmap, which the kernel doesn't decode (only the client section is decoded). Instead, let's drop this feature check. It effectively checks for luminous, so only pre-luminous OSDs would be affected in that on a PG split the kernel might resend a request that should not have been resent. Duplicates can occur in other scenarios, so both sides should already be prepared for them: see dup/replay logic on the OSD side and retry_attempt check on the client side. Cc: stable@vger.kernel.org Fixes: 7de030d6b10a ("libceph: resend on PG splits if OSD has RESEND_ON_SPLIT") Link: https://tracker.ceph.com/issues/41162 Reported-by: Jerry Lee Signed-off-by: Ilya Dryomov Tested-by: Jerry Lee Reviewed-by: Jeff Layton --- net/ceph/osd_client.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 0b2df09b2554..78ae6e8c953d 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1496,7 +1496,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, struct ceph_osds up, acting; bool force_resend = false; bool unpaused = false; - bool legacy_change; + bool legacy_change = false; bool split = false; bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE); bool recovery_deletes = ceph_osdmap_flag(osdc, @@ -1584,15 +1584,14 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, t->osd = acting.primary; } - if (unpaused || legacy_change || force_resend || - (split && con && CEPH_HAVE_FEATURE(con->peer_features, - RESEND_ON_SPLIT))) + if (unpaused || legacy_change || force_resend || split) ct_res = CALC_TARGET_NEED_RESEND; else ct_res = CALC_TARGET_NO_ACTION; out: - dout("%s t %p -> ct_res %d osd %d\n", __func__, t, ct_res, t->osd); + dout("%s t %p -> %d%d%d%d ct_res %d osd%d\n", __func__, t, unpaused, + legacy_change, force_resend, split, ct_res, t->osd); return ct_res; } -- cgit v1.2.1 From 7871aa60ae0086fe4626abdf5ed13eeddf306c61 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Thu, 8 Aug 2019 08:35:40 +0000 Subject: mmc: sdhci-of-at91: add quirk for broken HS200 HS200 is not implemented in the driver, but the controller claims it through caps. Remove it via a quirk, to make sure the mmc core do not try to enable HS200, as it causes the eMMC initialization to fail. Signed-off-by: Eugen Hristev Acked-by: Ludovic Desroches Acked-by: Adrian Hunter Fixes: bb5f8ea4d514 ("mmc: sdhci-of-at91: introduce driver for the Atmel SDMMC") Cc: stable@vger.kernel.org # v4.4+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-at91.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index d4e7e8b7be77..e7d1920729fb 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -357,6 +357,9 @@ static int sdhci_at91_probe(struct platform_device *pdev) pm_runtime_set_autosuspend_delay(&pdev->dev, 50); pm_runtime_use_autosuspend(&pdev->dev); + /* HS200 is broken at this moment */ + host->quirks2 = SDHCI_QUIRK2_BROKEN_HS200; + ret = sdhci_add_host(host); if (ret) goto pm_runtime_disable; -- cgit v1.2.1 From 2113c5f62b7423e4a72b890bd479704aa85c81ba Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 22 Aug 2019 13:03:05 +0200 Subject: KVM: arm/arm64: Only skip MMIO insn once If after an MMIO exit to userspace a VCPU is immediately run with an immediate_exit request, such as when a signal is delivered or an MMIO emulation completion is needed, then the VCPU completes the MMIO emulation and immediately returns to userspace. As the exit_reason does not get changed from KVM_EXIT_MMIO in these cases we have to be careful not to complete the MMIO emulation again, when the VCPU is eventually run again, because the emulation does an instruction skip (and doing too many skips would be a waste of guest code :-) We need to use additional VCPU state to track if the emulation is complete. As luck would have it, we already have 'mmio_needed', which even appears to be used in this way by other architectures already. Fixes: 0d640732dbeb ("arm64: KVM: Skip MMIO insn after emulation") Acked-by: Mark Rutland Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmio.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c index a8a6a0c883f1..6af5c91337f2 100644 --- a/virt/kvm/arm/mmio.c +++ b/virt/kvm/arm/mmio.c @@ -86,6 +86,12 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) unsigned int len; int mask; + /* Detect an already handled MMIO return */ + if (unlikely(!vcpu->mmio_needed)) + return 0; + + vcpu->mmio_needed = 0; + if (!run->mmio.is_write) { len = run->mmio.len; if (len > sizeof(unsigned long)) @@ -188,6 +194,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, run->mmio.is_write = is_write; run->mmio.phys_addr = fault_ipa; run->mmio.len = len; + vcpu->mmio_needed = 1; if (!ret) { /* We handled the access successfully in the kernel. */ -- cgit v1.2.1 From a5fb8e6c02d6a518fb2b1a2b8c2471fa77b69436 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Aug 2019 13:28:43 +0100 Subject: afs: Fix leak in afs_lookup_cell_rcu() Fix a leak on the cell refcount in afs_lookup_cell_rcu() due to non-clearance of the default error in the case a NULL cell name is passed and the workstation default cell is used. Also put a bit at the end to make sure we don't leak a cell ref if we're going to be returning an error. This leak results in an assertion like the following when the kafs module is unloaded: AFS: Assertion failed 2 == 1 is false 0x2 == 0x1 is false ------------[ cut here ]------------ kernel BUG at fs/afs/cell.c:770! ... RIP: 0010:afs_manage_cells+0x220/0x42f [kafs] ... process_one_work+0x4c2/0x82c ? pool_mayday_timeout+0x1e1/0x1e1 ? do_raw_spin_lock+0x134/0x175 worker_thread+0x336/0x4a6 ? rescuer_thread+0x4af/0x4af kthread+0x1de/0x1ee ? kthread_park+0xd4/0xd4 ret_from_fork+0x24/0x30 Fixes: 989782dcdc91 ("afs: Overhaul cell database management") Signed-off-by: David Howells --- fs/afs/cell.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index a2a87117d262..fd5133e26a38 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -74,6 +74,7 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net, cell = rcu_dereference_raw(net->ws_cell); if (cell) { afs_get_cell(cell); + ret = 0; break; } ret = -EDESTADDRREQ; @@ -108,6 +109,9 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net, done_seqretry(&net->cells_lock, seq); + if (ret != 0 && cell) + afs_put_cell(net, cell); + return ret == 0 ? cell : ERR_PTR(ret); } -- cgit v1.2.1 From c4c613ff08d92e72bf64a65ec35a2c3aa1cfcd06 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Thu, 22 Aug 2019 13:28:43 +0100 Subject: afs: Fix possible oops in afs_lookup trace event The afs_lookup trace event can cause the following: [ 216.576777] BUG: kernel NULL pointer dereference, address: 000000000000023b [ 216.576803] #PF: supervisor read access in kernel mode [ 216.576813] #PF: error_code(0x0000) - not-present page ... [ 216.576913] RIP: 0010:trace_event_raw_event_afs_lookup+0x9e/0x1c0 [kafs] If the inode from afs_do_lookup() is an error other than ENOENT, or if it is ENOENT and afs_try_auto_mntpt() returns an error, the trace event will try to dereference the error pointer as a valid pointer. Use IS_ERR_OR_NULL to only pass a valid pointer for the trace, or NULL. Ideally the trace would include the error value, but for now just avoid the oops. Fixes: 80548b03991f ("afs: Add more tracepoints") Signed-off-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 81207dc3c997..139b4e3cc946 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -959,7 +959,8 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, inode ? AFS_FS_I(inode) : NULL); } else { trace_afs_lookup(dvnode, &dentry->d_name, - inode ? AFS_FS_I(inode) : NULL); + IS_ERR_OR_NULL(inode) ? NULL + : AFS_FS_I(inode)); } return d; } -- cgit v1.2.1 From 7533be858f5b9a036b9f91556a3ed70786abca8e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 19 Aug 2019 16:05:31 +0100 Subject: afs: use correct afs_call_type in yfs_fs_store_opaque_acl2 It seems that 'yfs_RXYFSStoreOpaqueACL2' should be use in yfs_fs_store_opaque_acl2(). Fixes: f5e4546347bc ("afs: Implement YFS ACL setting") Signed-off-by: YueHaibing Signed-off-by: David Howells --- fs/afs/yfsclient.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 2575503170fc..ca2452806ebf 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -2171,7 +2171,7 @@ int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *fc, const struct afs_acl *acl key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); size = round_up(acl->size, 4); - call = afs_alloc_flat_call(net, &yfs_RXYFSStoreStatus, + call = afs_alloc_flat_call(net, &yfs_RXYFSStoreOpaqueACL2, sizeof(__be32) * 2 + sizeof(struct yfs_xdr_YFSFid) + sizeof(__be32) + size, -- cgit v1.2.1 From d37b1e534071ab1983e7c85273234b132c77591a Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 22 Aug 2019 03:02:50 -0700 Subject: RDMA/bnxt_re: Fix stack-out-of-bounds in bnxt_qplib_rcfw_send_message Driver copies FW commands to the HW queue as units of 16 bytes. Some of the command structures are not exact multiple of 16. So while copying the data from those structures, the stack out of bounds messages are reported by KASAN. The following error is reported. [ 1337.530155] ================================================================== [ 1337.530277] BUG: KASAN: stack-out-of-bounds in bnxt_qplib_rcfw_send_message+0x40a/0x850 [bnxt_re] [ 1337.530413] Read of size 16 at addr ffff888725477a48 by task rmmod/2785 [ 1337.530540] CPU: 5 PID: 2785 Comm: rmmod Tainted: G OE 5.2.0-rc6+ #75 [ 1337.530541] Hardware name: Dell Inc. PowerEdge R730/0599V5, BIOS 1.0.4 08/28/2014 [ 1337.530542] Call Trace: [ 1337.530548] dump_stack+0x5b/0x90 [ 1337.530556] ? bnxt_qplib_rcfw_send_message+0x40a/0x850 [bnxt_re] [ 1337.530560] print_address_description+0x65/0x22e [ 1337.530568] ? bnxt_qplib_rcfw_send_message+0x40a/0x850 [bnxt_re] [ 1337.530575] ? bnxt_qplib_rcfw_send_message+0x40a/0x850 [bnxt_re] [ 1337.530577] __kasan_report.cold.3+0x37/0x77 [ 1337.530581] ? _raw_write_trylock+0x10/0xe0 [ 1337.530588] ? bnxt_qplib_rcfw_send_message+0x40a/0x850 [bnxt_re] [ 1337.530590] kasan_report+0xe/0x20 [ 1337.530592] memcpy+0x1f/0x50 [ 1337.530600] bnxt_qplib_rcfw_send_message+0x40a/0x850 [bnxt_re] [ 1337.530608] ? bnxt_qplib_creq_irq+0xa0/0xa0 [bnxt_re] [ 1337.530611] ? xas_create+0x3aa/0x5f0 [ 1337.530613] ? xas_start+0x77/0x110 [ 1337.530615] ? xas_clear_mark+0x34/0xd0 [ 1337.530623] bnxt_qplib_free_mrw+0x104/0x1a0 [bnxt_re] [ 1337.530631] ? bnxt_qplib_destroy_ah+0x110/0x110 [bnxt_re] [ 1337.530633] ? bit_wait_io_timeout+0xc0/0xc0 [ 1337.530641] bnxt_re_dealloc_mw+0x2c/0x60 [bnxt_re] [ 1337.530648] bnxt_re_destroy_fence_mr+0x77/0x1d0 [bnxt_re] [ 1337.530655] bnxt_re_dealloc_pd+0x25/0x60 [bnxt_re] [ 1337.530677] ib_dealloc_pd_user+0xbe/0xe0 [ib_core] [ 1337.530683] srpt_remove_one+0x5de/0x690 [ib_srpt] [ 1337.530689] ? __srpt_close_all_ch+0xc0/0xc0 [ib_srpt] [ 1337.530692] ? xa_load+0x87/0xe0 ... [ 1337.530840] do_syscall_64+0x6d/0x1f0 [ 1337.530843] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 1337.530845] RIP: 0033:0x7ff5b389035b [ 1337.530848] Code: 73 01 c3 48 8b 0d 2d 0b 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d fd 0a 2c 00 f7 d8 64 89 01 48 [ 1337.530849] RSP: 002b:00007fff83425c28 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [ 1337.530852] RAX: ffffffffffffffda RBX: 00005596443e6750 RCX: 00007ff5b389035b [ 1337.530853] RDX: 000000000000000a RSI: 0000000000000800 RDI: 00005596443e67b8 [ 1337.530854] RBP: 0000000000000000 R08: 00007fff83424ba1 R09: 0000000000000000 [ 1337.530856] R10: 00007ff5b3902960 R11: 0000000000000206 R12: 00007fff83425e50 [ 1337.530857] R13: 00007fff8342673c R14: 00005596443e6260 R15: 00005596443e6750 [ 1337.530885] The buggy address belongs to the page: [ 1337.530962] page:ffffea001c951dc0 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 1337.530964] flags: 0x57ffffc0000000() [ 1337.530967] raw: 0057ffffc0000000 0000000000000000 ffffffff1c950101 0000000000000000 [ 1337.530970] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 1337.530970] page dumped because: kasan: bad access detected [ 1337.530996] Memory state around the buggy address: [ 1337.531072] ffff888725477900: 00 00 00 00 f1 f1 f1 f1 00 00 00 00 00 f2 f2 f2 [ 1337.531180] ffff888725477980: 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 [ 1337.531288] >ffff888725477a00: 00 f2 f2 f2 f2 f2 f2 00 00 00 f2 00 00 00 00 00 [ 1337.531393] ^ [ 1337.531478] ffff888725477a80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 1337.531585] ffff888725477b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 1337.531691] ================================================================== Fix this by passing the exact size of each FW command to bnxt_qplib_rcfw_send_message as req->cmd_size. Before sending the command to HW, modify the req->cmd_size to number of 16 byte units. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1566468170-489-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 8 +++++++- drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 11 ++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 48b04d2f175f..60c8f76aab33 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -136,6 +136,13 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, spin_unlock_irqrestore(&cmdq->lock, flags); return -EBUSY; } + + size = req->cmd_size; + /* change the cmd_size to the number of 16byte cmdq unit. + * req->cmd_size is modified here + */ + bnxt_qplib_set_cmd_slots(req); + memset(resp, 0, sizeof(*resp)); crsqe->resp = (struct creq_qp_event *)resp; crsqe->resp->cookie = req->cookie; @@ -150,7 +157,6 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, cmdq_ptr = (struct bnxt_qplib_cmdqe **)cmdq->pbl_ptr; preq = (u8 *)req; - size = req->cmd_size * BNXT_QPLIB_CMDQE_UNITS; do { /* Locate the next cmdq slot */ sw_prod = HWQ_CMP(cmdq->prod, cmdq); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 2138533bb642..dfeadc192e17 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -55,9 +55,7 @@ do { \ memset(&(req), 0, sizeof((req))); \ (req).opcode = CMDQ_BASE_OPCODE_##CMD; \ - (req).cmd_size = (sizeof((req)) + \ - BNXT_QPLIB_CMDQE_UNITS - 1) / \ - BNXT_QPLIB_CMDQE_UNITS; \ + (req).cmd_size = sizeof((req)); \ (req).flags = cpu_to_le16(cmd_flags); \ } while (0) @@ -95,6 +93,13 @@ static inline u32 bnxt_qplib_cmdqe_cnt_per_pg(u32 depth) BNXT_QPLIB_CMDQE_UNITS); } +/* Set the cmd_size to a factor of CMDQE unit */ +static inline void bnxt_qplib_set_cmd_slots(struct cmdq_base *req) +{ + req->cmd_size = (req->cmd_size + BNXT_QPLIB_CMDQE_UNITS - 1) / + BNXT_QPLIB_CMDQE_UNITS; +} + #define MAX_CMDQ_IDX(depth) ((depth) - 1) static inline u32 bnxt_qplib_max_cmdq_idx_per_pg(u32 depth) -- cgit v1.2.1 From fab4f97e1fe33cf08e58c09cf9eee334857d9fe7 Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Thu, 22 Aug 2019 17:07:41 +0200 Subject: RDMA/siw: Fix SGL mapping issues All user level and most in-kernel applications submit WQEs where the SG list entries are all of a single type. iSER in particular, however, will send us WQEs with mixed SG types: sge[0] = kernel buffer, sge[1] = PBL region. Check and set is_kva on each SG entry individually instead of assuming the first SGE type carries through to the last. This fixes iSER over siw. Fixes: b9be6f18cf9e ("rdma/siw: transmit path") Reported-by: Krishnamraju Eraparaju Tested-by: Krishnamraju Eraparaju Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20190822150741.21871-1-bmt@zurich.ibm.com Signed-off-by: Doug Ledford --- drivers/infiniband/sw/siw/siw_qp_tx.c | 37 ++++++++++++++--------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 43020d2040fc..42c63622c7bd 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -398,15 +398,13 @@ static int siw_0copy_tx(struct socket *s, struct page **page, #define MAX_TRAILER (MPA_CRC_SIZE + 4) -static void siw_unmap_pages(struct page **pages, int hdr_len, int num_maps) +static void siw_unmap_pages(struct page **pp, unsigned long kmap_mask) { - if (hdr_len) { - ++pages; - --num_maps; - } - while (num_maps-- > 0) { - kunmap(*pages); - pages++; + while (kmap_mask) { + if (kmap_mask & BIT(0)) + kunmap(*pp); + pp++; + kmap_mask >>= 1; } } @@ -437,6 +435,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) unsigned int data_len = c_tx->bytes_unsent, hdr_len = 0, trl_len = 0, sge_off = c_tx->sge_off, sge_idx = c_tx->sge_idx, pbl_idx = c_tx->pbl_idx; + unsigned long kmap_mask = 0L; if (c_tx->state == SIW_SEND_HDR) { if (c_tx->use_sendpage) { @@ -463,8 +462,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) if (!(tx_flags(wqe) & SIW_WQE_INLINE)) { mem = wqe->mem[sge_idx]; - if (!mem->mem_obj) - is_kva = 1; + is_kva = mem->mem_obj == NULL ? 1 : 0; } else { is_kva = 1; } @@ -500,12 +498,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) p = siw_get_upage(mem->umem, sge->laddr + sge_off); if (unlikely(!p)) { - if (hdr_len) - seg--; - if (!c_tx->use_sendpage && seg) { - siw_unmap_pages(page_array, - hdr_len, seg); - } + siw_unmap_pages(page_array, kmap_mask); wqe->processed -= c_tx->bytes_unsent; rv = -EFAULT; goto done_crc; @@ -515,6 +508,10 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) if (!c_tx->use_sendpage) { iov[seg].iov_base = kmap(p) + fp_off; iov[seg].iov_len = plen; + + /* Remember for later kunmap() */ + kmap_mask |= BIT(seg); + if (do_crc) crypto_shash_update( c_tx->mpa_crc_hd, @@ -543,10 +540,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) if (++seg > (int)MAX_ARRAY) { siw_dbg_qp(tx_qp(c_tx), "to many fragments\n"); - if (!is_kva && !c_tx->use_sendpage) { - siw_unmap_pages(page_array, hdr_len, - seg - 1); - } + siw_unmap_pages(page_array, kmap_mask); wqe->processed -= c_tx->bytes_unsent; rv = -EMSGSIZE; goto done_crc; @@ -597,8 +591,7 @@ sge_done: } else { rv = kernel_sendmsg(s, &msg, iov, seg + 1, hdr_len + data_len + trl_len); - if (!is_kva) - siw_unmap_pages(page_array, hdr_len, seg); + siw_unmap_pages(page_array, kmap_mask); } if (rv < (int)hdr_len) { /* Not even complete hdr pushed or negative rv */ -- cgit v1.2.1 From 7542c6dedbc1caa284ca4cbd6b64f99023ff1b97 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 18 Jun 2019 12:09:26 +0900 Subject: jffs2: Remove C++ style comments from uapi header Linux kernel tolerates C++ style comments these days. Actually, the SPDX License tags for .c files start with //. On the other hand, uapi headers are written in more strict C, where the C++ comment style is forbidden. I simply dropped these lines instead of fixing the comment style. This code has been always commented out since it was added around Linux 2.4.9 (i.e. commented out for more than 17 years). 'Maybe later...' will never happen. Signed-off-by: Masahiro Yamada Acked-by: Richard Weinberger Signed-off-by: Richard Weinberger --- include/uapi/linux/jffs2.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/uapi/linux/jffs2.h b/include/uapi/linux/jffs2.h index a18b719f49d4..784ba0b9690a 100644 --- a/include/uapi/linux/jffs2.h +++ b/include/uapi/linux/jffs2.h @@ -77,11 +77,6 @@ #define JFFS2_ACL_VERSION 0x0001 -// Maybe later... -//#define JFFS2_NODETYPE_CHECKPOINT (JFFS2_FEATURE_RWCOMPAT_DELETE | JFFS2_NODE_ACCURATE | 3) -//#define JFFS2_NODETYPE_OPTIONS (JFFS2_FEATURE_RWCOMPAT_COPY | JFFS2_NODE_ACCURATE | 4) - - #define JFFS2_INO_FLAG_PREREAD 1 /* Do read_inode() for this one at mount time, don't wait for it to happen later */ -- cgit v1.2.1 From 4dd75b335bc1f10fb1a01b5cd58870d47c13c4e7 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 13 Aug 2019 23:50:51 +0200 Subject: ubifs: Fix double unlock around orphan_delete() We unlock after orphan_delete(), so no need to unlock in the function too. Reported-by: Han Xu Fixes: 8009ce956c3d ("ubifs: Don't leak orphans on memory during commit") Signed-off-by: Richard Weinberger --- fs/ubifs/orphan.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index b52624e28fa1..3b4b4114f208 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -129,7 +129,6 @@ static void __orphan_drop(struct ubifs_info *c, struct ubifs_orphan *o) static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph) { if (orph->del) { - spin_unlock(&c->orphan_lock); dbg_gen("deleted twice ino %lu", orph->inum); return; } @@ -138,7 +137,6 @@ static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph) orph->del = 1; orph->dnext = c->orph_dnext; c->orph_dnext = orph; - spin_unlock(&c->orphan_lock); dbg_gen("delete later ino %lu", orph->inum); return; } -- cgit v1.2.1 From 377e208f44784174f3002d9892d553715a3ab71b Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 13 Aug 2019 23:55:48 +0200 Subject: ubifs: Correctly initialize c->min_log_bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently on a freshly mounted UBIFS, c->min_log_bytes is 0. This can lead to a log overrun and make commits fail. Recent kernels will report the following assert: UBIFS assert failed: c->lhead_lnum != c->ltail_lnum, in fs/ubifs/log.c:412 c->min_log_bytes can have two states, 0 and c->leb_size. It controls how much bytes of the log area are reserved for non-bud nodes such as commit nodes. After a commit it has to be set to c->leb_size such that we have always enough space for a commit. While a commit runs it can be 0 to make the remaining bytes of the log available to writers. Having it set to 0 right after mount is wrong since no space for commits is reserved. Fixes: 1e51764a3c2ac ("UBIFS: add new flash file system") Reported-and-tested-by: Uwe Kleine-König Signed-off-by: Richard Weinberger --- fs/ubifs/super.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 2c0803b0ac3a..8c1d571334bc 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -609,6 +609,10 @@ static int init_constants_early(struct ubifs_info *c) c->max_bu_buf_len = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ; if (c->max_bu_buf_len > c->leb_size) c->max_bu_buf_len = c->leb_size; + + /* Log is ready, preserve one LEB for commits. */ + c->min_log_bytes = c->leb_size; + return 0; } -- cgit v1.2.1 From 0af83abbd4a6e36a4b209d8c57c26143e40eeec1 Mon Sep 17 00:00:00 2001 From: Liu Song Date: Tue, 6 Aug 2019 22:21:40 +0800 Subject: ubifs: Limit the number of pages in shrink_liability If the number of dirty pages to be written back is large, then writeback_inodes_sb will block waiting for a long time, causing hung task detection alarm. Therefore, we should limit the maximum number of pages written back this time, which let the budget be completed faster. The remaining dirty pages tend to rely on the writeback mechanism to complete the synchronization. Fixes: b6e51316daed ("writeback: separate starting of sync vs opportunistic writeback") Signed-off-by: Liu Song Signed-off-by: Richard Weinberger --- fs/ubifs/budget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 80d7301ab76d..c0b84e960b20 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -51,7 +51,7 @@ static void shrink_liability(struct ubifs_info *c, int nr_to_write) { down_read(&c->vfs_sb->s_umount); - writeback_inodes_sb(c->vfs_sb, WB_REASON_FS_FREE_SPACE); + writeback_inodes_sb_nr(c->vfs_sb, nr_to_write, WB_REASON_FS_FREE_SPACE); up_read(&c->vfs_sb->s_umount); } -- cgit v1.2.1 From e4f9d6013820d1eba1432d51dd1c5795759aa77f Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Sat, 17 Aug 2019 13:32:40 +0800 Subject: dm btree: fix order of block initialization in btree_split_beneath When btree_split_beneath() splits a node to two new children, it will allocate two blocks: left and right. If right block's allocation failed, the left block will be unlocked and marked dirty. If this happened, the left block'ss content is zero, because it wasn't initialized with the btree struct before the attempot to allocate the right block. Upon return, when flushing the left block to disk, the validator will fail when check this block. Then a BUG_ON is raised. Fix this by completely initializing the left block before allocating and initializing the right block. Fixes: 4dcb8b57df359 ("dm btree: fix leak of bufio-backed block in btree_split_beneath error path") Cc: stable@vger.kernel.org Signed-off-by: ZhangXiaoxu Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-btree.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 58b319757b1e..8aae0624a297 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -628,39 +628,40 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) new_parent = shadow_current(s); + pn = dm_block_data(new_parent); + size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ? + sizeof(__le64) : s->info->value_type.size; + + /* create & init the left block */ r = new_block(s->info, &left); if (r < 0) return r; + ln = dm_block_data(left); + nr_left = le32_to_cpu(pn->header.nr_entries) / 2; + + ln->header.flags = pn->header.flags; + ln->header.nr_entries = cpu_to_le32(nr_left); + ln->header.max_entries = pn->header.max_entries; + ln->header.value_size = pn->header.value_size; + memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0])); + memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size); + + /* create & init the right block */ r = new_block(s->info, &right); if (r < 0) { unlock_block(s->info, left); return r; } - pn = dm_block_data(new_parent); - ln = dm_block_data(left); rn = dm_block_data(right); - - nr_left = le32_to_cpu(pn->header.nr_entries) / 2; nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left; - ln->header.flags = pn->header.flags; - ln->header.nr_entries = cpu_to_le32(nr_left); - ln->header.max_entries = pn->header.max_entries; - ln->header.value_size = pn->header.value_size; - rn->header.flags = pn->header.flags; rn->header.nr_entries = cpu_to_le32(nr_right); rn->header.max_entries = pn->header.max_entries; rn->header.value_size = pn->header.value_size; - - memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0])); memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0])); - - size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ? - sizeof(__le64) : s->info->value_type.size; - memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size); memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left), nr_right * size); -- cgit v1.2.1 From ae148243d3f0816b37477106c05a2ec7d5f32614 Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Mon, 19 Aug 2019 11:31:21 +0800 Subject: dm space map metadata: fix missing store of apply_bops() return value In commit 6096d91af0b6 ("dm space map metadata: fix occasional leak of a metadata block on resize"), we refactor the commit logic to a new function 'apply_bops'. But when that logic was replaced in out() the return value was not stored. This may lead out() returning a wrong value to the caller. Fixes: 6096d91af0b6 ("dm space map metadata: fix occasional leak of a metadata block on resize") Cc: stable@vger.kernel.org Signed-off-by: ZhangXiaoxu Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-space-map-metadata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index aec449243966..25328582cc48 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -249,7 +249,7 @@ static int out(struct sm_metadata *smm) } if (smm->recursion_count == 1) - apply_bops(smm); + r = apply_bops(smm); smm->recursion_count--; -- cgit v1.2.1 From 8465df4025dd4ab84fc24dad6a91cc2b9ec1604d Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sun, 14 Jul 2019 14:06:40 +0300 Subject: net/mlx5: Fix crdump chunks print Crdump repeats itself every chunk of 256bytes. That is due to bug of missing progressing offset while copying the data from buffer to devlink_fmsg. Fixes: 9b1f29823605 ("net/mlx5: Add support for FW fatal reporter dump") Signed-off-by: Moshe Shemesh Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 9314777d99e3..cc5887f52679 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -590,7 +590,8 @@ mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter, data_size = crdump_size - offset; else data_size = MLX5_CR_DUMP_CHUNK_SIZE; - err = devlink_fmsg_binary_put(fmsg, cr_data, data_size); + err = devlink_fmsg_binary_put(fmsg, (char *)cr_data + offset, + data_size); if (err) goto free_data; } -- cgit v1.2.1 From a6633e11e8732b9c000774746a2c1827a7e3c316 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 13 Aug 2019 12:49:13 +0300 Subject: net/mlx5: Fix delay in fw fatal report handling due to fw report When fw fatal error occurs, poll health() first detects and reports on a fw error. Afterwards, it detects and reports on the fw fatal error itself. That can cause a long delay in fw fatal error handling which waits in a queue for the fw error handling to be finished. The fw error handle will try asking for fw core dump command while fw in fatal state may not respond and driver will wait for command timeout. Changing the flow to detect and handle first fw fatal errors and only if no fatal error detected look for a fw error to handle. Fixes: d1bf0e2cc4a6 ("net/mlx5: Report devlink health on FW issues") Signed-off-by: Moshe Shemesh Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index cc5887f52679..d685122d9ff7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -701,6 +701,16 @@ static void poll_health(struct timer_list *t) if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) goto out; + fatal_error = check_fatal_sensors(dev); + + if (fatal_error && !health->fatal_error) { + mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error); + dev->priv.health.fatal_error = fatal_error; + print_health_info(dev); + mlx5_trigger_health_work(dev); + goto out; + } + count = ioread32be(health->health_counter); if (count == health->prev) ++health->miss_counter; @@ -719,15 +729,6 @@ static void poll_health(struct timer_list *t) if (health->synd && health->synd != prev_synd) queue_work(health->wq, &health->report_work); - fatal_error = check_fatal_sensors(dev); - - if (fatal_error && !health->fatal_error) { - mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error); - dev->priv.health.fatal_error = fatal_error; - print_health_info(dev); - mlx5_trigger_health_work(dev); - } - out: mod_timer(&health->timer, get_next_poll_jiffies()); } -- cgit v1.2.1 From 5c6f40c61777e059ac3692c4505dff5eb880a12d Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 22 Aug 2019 15:03:27 +0300 Subject: net/mlx5e: Add num bytes metadata to WQE info For TLS WQEs, metadata info did not include num_bytes. Due to this issue, tx_tls_dump_bytes counter did not increment. Modify tx_fill_wi() to fill num bytes. When it is called for non-traffic WQE, zero is expected. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 8b93101e1a09..0681735ea398 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -109,13 +109,15 @@ build_progress_params(struct mlx5e_tx_wqe *wqe, u16 pc, u32 sqn, static void tx_fill_wi(struct mlx5e_txqsq *sq, u16 pi, u8 num_wqebbs, - skb_frag_t *resync_dump_frag) + skb_frag_t *resync_dump_frag, + u32 num_bytes) { struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; wi->skb = NULL; wi->num_wqebbs = num_wqebbs; wi->resync_dump_frag = resync_dump_frag; + wi->num_bytes = num_bytes; } void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx) @@ -143,7 +145,7 @@ post_static_params(struct mlx5e_txqsq *sq, umr_wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_STATIC_UMR_WQE_SZ, &pi); build_static_params(umr_wqe, sq->pc, sq->sqn, priv_tx, fence); - tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, NULL); + tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, NULL, 0); sq->pc += MLX5E_KTLS_STATIC_WQEBBS; } @@ -157,7 +159,7 @@ post_progress_params(struct mlx5e_txqsq *sq, wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_PROGRESS_WQE_SZ, &pi); build_progress_params(wqe, sq->pc, sq->sqn, priv_tx, fence); - tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, NULL); + tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, NULL, 0); sq->pc += MLX5E_KTLS_PROGRESS_WQEBBS; } @@ -296,7 +298,7 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, struct sk_buff *skb, dseg->byte_count = cpu_to_be32(fsz); mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); - tx_fill_wi(sq, pi, num_wqebbs, frag); + tx_fill_wi(sq, pi, num_wqebbs, frag, fsz); sq->pc += num_wqebbs; WARN(num_wqebbs > MLX5E_KTLS_MAX_DUMP_WQEBBS, @@ -323,7 +325,7 @@ static void tx_post_fence_nop(struct mlx5e_txqsq *sq) struct mlx5_wq_cyc *wq = &sq->wq; u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - tx_fill_wi(sq, pi, 1, NULL); + tx_fill_wi(sq, pi, 1, NULL, 0); mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc); } -- cgit v1.2.1 From a195784c105b2907b45fd62307d9ce821da9dc20 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 21 Aug 2019 15:47:29 +0300 Subject: net/mlx5e: Remove ethernet segment from dump WQE Dump WQE shall not include Ethernet segment. Define mlx5e_dump_wqe to be used for "Dump WQEs" instead of sharing it with the general mlx5e_tx_wqe layout. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 26 +++++++++------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 0681735ea398..7833ddef0427 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -250,43 +250,37 @@ tx_post_resync_params(struct mlx5e_txqsq *sq, mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, skip_static_post, true); } +struct mlx5e_dump_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_data_seg data; +}; + static int tx_post_resync_dump(struct mlx5e_txqsq *sq, struct sk_buff *skb, skb_frag_t *frag, u32 tisn, bool first) { struct mlx5_wqe_ctrl_seg *cseg; - struct mlx5_wqe_eth_seg *eseg; struct mlx5_wqe_data_seg *dseg; - struct mlx5e_tx_wqe *wqe; + struct mlx5e_dump_wqe *wqe; dma_addr_t dma_addr = 0; - u16 ds_cnt, ds_cnt_inl; u8 num_wqebbs; - u16 pi, ihs; + u16 ds_cnt; int fsz; - - ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; - ihs = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb)); - ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS); - ds_cnt += ds_cnt_inl; - ds_cnt += 1; /* one frag */ + u16 pi; wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi); + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); cseg = &wqe->ctrl; - eseg = &wqe->eth; - dseg = wqe->data; + dseg = &wqe->data; cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); cseg->tisn = cpu_to_be32(tisn << 8); cseg->fm_ce_se = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; - eseg->inline_hdr.sz = cpu_to_be16(ihs); - memcpy(eseg->inline_hdr.start, skb->data, ihs); - dseg += ds_cnt_inl; - fsz = skb_frag_size(frag); dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, DMA_TO_DEVICE); -- cgit v1.2.1 From 08f5439f1df25a6cf6cf4c72cf6c13025599ce67 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 21 Aug 2019 22:19:11 -0600 Subject: io_uring: add need_resched() check in inner poll loop The outer poll loop checks for whether we need to reschedule, and returns to userspace if we do. However, it's possible to get stuck in the inner loop as well, if the CPU we are running on needs to reschedule to finish the IO work. Add the need_resched() check in the inner loop as well. This fixes a potential hang if the kernel is configured with CONFIG_PREEMPT_VOLUNTARY=y. Reported-by: Sagi Grimberg Reviewed-by: Sagi Grimberg Tested-by: Sagi Grimberg Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e7a43a354d91..cfb48bd088e1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -778,7 +778,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events, long min) { - while (!list_empty(&ctx->poll_list)) { + while (!list_empty(&ctx->poll_list) && !need_resched()) { int ret; ret = io_do_iopoll(ctx, nr_events, min); @@ -805,6 +805,12 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx) unsigned int nr_events = 0; io_iopoll_getevents(ctx, &nr_events, 1); + + /* + * Ensure we allow local-to-the-cpu processing to take place, + * in this case we need to ensure that we reap all events. + */ + cond_resched(); } mutex_unlock(&ctx->uring_lock); } -- cgit v1.2.1 From e0917f879536cbf57367429d084775d8224c986c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 22 Jul 2019 09:12:56 +0200 Subject: um: fix time travel mode Unfortunately, my build fix for when time travel mode isn't enabled broke time travel mode, because I forgot that we need to use the timer time after the timer has been marked disabled, and thus need to leave the time stored instead of zeroing it. Fix that by splitting the inline into two, so we can call only the _mode() one in the relevant code path. Fixes: b482e48d29f1 ("um: fix build without CONFIG_UML_TIME_TRAVEL_SUPPORT") Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/include/shared/timer-internal.h | 14 ++++++++++---- arch/um/kernel/process.c | 2 +- arch/um/kernel/time.c | 16 +++++++++------- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/arch/um/include/shared/timer-internal.h b/arch/um/include/shared/timer-internal.h index 8574338bf23b..9991ec2371e4 100644 --- a/arch/um/include/shared/timer-internal.h +++ b/arch/um/include/shared/timer-internal.h @@ -34,10 +34,13 @@ static inline void time_travel_set_time(unsigned long long ns) time_travel_time = ns; } -static inline void time_travel_set_timer(enum time_travel_timer_mode mode, - unsigned long long expiry) +static inline void time_travel_set_timer_mode(enum time_travel_timer_mode mode) { time_travel_timer_mode = mode; +} + +static inline void time_travel_set_timer_expiry(unsigned long long expiry) +{ time_travel_timer_expiry = expiry; } #else @@ -50,8 +53,11 @@ static inline void time_travel_set_time(unsigned long long ns) { } -static inline void time_travel_set_timer(enum time_travel_timer_mode mode, - unsigned long long expiry) +static inline void time_travel_set_timer_mode(enum time_travel_timer_mode mode) +{ +} + +static inline void time_travel_set_timer_expiry(unsigned long long expiry) { } diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 67c0d1a860e9..6bede7888fc2 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -213,7 +213,7 @@ static void time_travel_sleep(unsigned long long duration) if (time_travel_timer_mode != TT_TMR_DISABLED || time_travel_timer_expiry < next) { if (time_travel_timer_mode == TT_TMR_ONESHOT) - time_travel_set_timer(TT_TMR_DISABLED, 0); + time_travel_set_timer_mode(TT_TMR_DISABLED); /* * time_travel_time will be adjusted in the timer * IRQ handler so it works even when the signal diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 6a051b078359..234757233355 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -50,7 +50,7 @@ void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) static int itimer_shutdown(struct clock_event_device *evt) { if (time_travel_mode != TT_MODE_OFF) - time_travel_set_timer(TT_TMR_DISABLED, 0); + time_travel_set_timer_mode(TT_TMR_DISABLED); if (time_travel_mode != TT_MODE_INFCPU) os_timer_disable(); @@ -62,9 +62,10 @@ static int itimer_set_periodic(struct clock_event_device *evt) { unsigned long long interval = NSEC_PER_SEC / HZ; - if (time_travel_mode != TT_MODE_OFF) - time_travel_set_timer(TT_TMR_PERIODIC, - time_travel_time + interval); + if (time_travel_mode != TT_MODE_OFF) { + time_travel_set_timer_mode(TT_TMR_PERIODIC); + time_travel_set_timer_expiry(time_travel_time + interval); + } if (time_travel_mode != TT_MODE_INFCPU) os_timer_set_interval(interval); @@ -77,9 +78,10 @@ static int itimer_next_event(unsigned long delta, { delta += 1; - if (time_travel_mode != TT_MODE_OFF) - time_travel_set_timer(TT_TMR_ONESHOT, - time_travel_time + delta); + if (time_travel_mode != TT_MODE_OFF) { + time_travel_set_timer_mode(TT_TMR_ONESHOT); + time_travel_set_timer_expiry(time_travel_time + delta); + } if (time_travel_mode != TT_MODE_INFCPU) return os_timer_one_shot(delta); -- cgit v1.2.1 From a71d9eff9394d24f05cbe115309152fb4543cd6c Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Wed, 21 Aug 2019 09:59:12 +0800 Subject: ocelot_ace: fix action of trap The trap action should be copying the frame to CPU and dropping it for forwarding, but current setting was just copying frame to CPU. Fixes: b596229448dd ("net: mscc: ocelot: Add support for tcam") Signed-off-by: Yangbo Lu Acked-by: Allan W. Nielsen Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_ace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot_ace.c b/drivers/net/ethernet/mscc/ocelot_ace.c index 39aca1ab4687..86fc6e6b46dd 100644 --- a/drivers/net/ethernet/mscc/ocelot_ace.c +++ b/drivers/net/ethernet/mscc/ocelot_ace.c @@ -317,7 +317,7 @@ static void is2_action_set(struct vcap_data *data, break; case OCELOT_ACL_ACTION_TRAP: VCAP_ACT_SET(PORT_MASK, 0x0); - VCAP_ACT_SET(MASK_MODE, 0x0); + VCAP_ACT_SET(MASK_MODE, 0x1); VCAP_ACT_SET(POLICE_ENA, 0x0); VCAP_ACT_SET(POLICE_IDX, 0x0); VCAP_ACT_SET(CPU_QU_NUM, 0x0); -- cgit v1.2.1 From de0e4fd2f07ce3bbdb69dfb8d9426b7227451b69 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Tue, 20 Aug 2019 23:46:36 -0500 Subject: qed: Add cleanup in qed_slowpath_start() If qed_mcp_send_drv_version() fails, no cleanup is executed, leading to memory leaks. To fix this issue, introduce the label 'err4' to perform the cleanup work before returning the error. Signed-off-by: Wenwen Wang Acked-by: Sudarsana Reddy Kalluru Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 829dd60ab937..1efff7f68ef6 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1325,7 +1325,7 @@ static int qed_slowpath_start(struct qed_dev *cdev, &drv_version); if (rc) { DP_NOTICE(cdev, "Failed sending drv version command\n"); - return rc; + goto err4; } } @@ -1333,6 +1333,8 @@ static int qed_slowpath_start(struct qed_dev *cdev, return 0; +err4: + qed_ll2_dealloc_if(cdev); err3: qed_hw_stop(cdev); err2: -- cgit v1.2.1 From b99328a60a482108f5195b4d611f90992ca016ba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 22 Aug 2019 13:00:15 +0200 Subject: timekeeping/vsyscall: Prevent math overflow in BOOTTIME update The VDSO update for CLOCK_BOOTTIME has a overflow issue as it shifts the nanoseconds based boot time offset left by the clocksource shift. That overflows once the boot time offset becomes large enough. As a consequence CLOCK_BOOTTIME in the VDSO becomes a random number causing applications to misbehave. Fix it by storing a timespec64 representation of the offset when boot time is adjusted and add that to the MONOTONIC base time value in the vdso data page. Using the timespec64 representation avoids a 64bit division in the update code. Fixes: 44f57d788e7d ("timekeeping: Provide a generic update_vsyscall() implementation") Reported-by: Chris Clayton Signed-off-by: Thomas Gleixner Tested-by: Chris Clayton Tested-by: Vincenzo Frascino Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908221257580.1983@nanos.tec.linutronix.de --- include/linux/timekeeper_internal.h | 5 +++++ kernel/time/timekeeping.c | 5 +++++ kernel/time/vsyscall.c | 22 +++++++++++++--------- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 7acb953298a7..84ff2844df2a 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -57,6 +57,7 @@ struct tk_read_base { * @cs_was_changed_seq: The sequence number of clocksource change events * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds + * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP * interval. @@ -84,6 +85,9 @@ struct tk_read_base { * * wall_to_monotonic is no longer the boot time, getboottime must be * used instead. + * + * @monotonic_to_boottime is a timespec64 representation of @offs_boot to + * accelerate the VDSO update for CLOCK_BOOTTIME. */ struct timekeeper { struct tk_read_base tkr_mono; @@ -99,6 +103,7 @@ struct timekeeper { u8 cs_was_changed_seq; ktime_t next_leap_ktime; u64 raw_sec; + struct timespec64 monotonic_to_boot; /* The following members are for timekeeping internal use */ u64 cycle_interval; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index d911c8470149..ca69290bee2a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -146,6 +146,11 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm) static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) { tk->offs_boot = ktime_add(tk->offs_boot, delta); + /* + * Timespec representation for VDSO update to avoid 64bit division + * on every update. + */ + tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot); } /* diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 8cf3596a4ce6..4bc37ac3bb05 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -17,7 +17,7 @@ static inline void update_vdso_data(struct vdso_data *vdata, struct timekeeper *tk) { struct vdso_timestamp *vdso_ts; - u64 nsec; + u64 nsec, sec; vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last; vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask; @@ -45,23 +45,27 @@ static inline void update_vdso_data(struct vdso_data *vdata, } vdso_ts->nsec = nsec; - /* CLOCK_MONOTONIC_RAW */ - vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW]; - vdso_ts->sec = tk->raw_sec; - vdso_ts->nsec = tk->tkr_raw.xtime_nsec; + /* Copy MONOTONIC time for BOOTTIME */ + sec = vdso_ts->sec; + /* Add the boot offset */ + sec += tk->monotonic_to_boot.tv_sec; + nsec += (u64)tk->monotonic_to_boot.tv_nsec << tk->tkr_mono.shift; /* CLOCK_BOOTTIME */ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME]; - vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec; - nsec += ((u64)(tk->wall_to_monotonic.tv_nsec + - ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift); + vdso_ts->sec = sec; + while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift); vdso_ts->sec++; } vdso_ts->nsec = nsec; + /* CLOCK_MONOTONIC_RAW */ + vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW]; + vdso_ts->sec = tk->raw_sec; + vdso_ts->nsec = tk->tkr_raw.xtime_nsec; + /* CLOCK_TAI */ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI]; vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset; -- cgit v1.2.1 From 75710f08ea7e41b2f7010da3f6deab061f7a853b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 21 Aug 2019 22:25:27 -0500 Subject: drm/amdgpu/powerplay: silence a warning in smu_v11_0_setup_pptable I think gcc is confused as I don't see how size could be used unitialized, but go ahead and silence the warning. Signed-off-by: Alex Deucher Reviewed-by: Evan Quan Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20190822032527.1376-1-alexander.deucher@amd.com --- drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index 3ac061a3c3c5..53097961bf2b 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -326,7 +326,7 @@ static int smu_v11_0_setup_pptable(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; const struct smc_firmware_header_v1_0 *hdr; int ret, index; - uint32_t size; + uint32_t size = 0; uint16_t atom_table_size; uint8_t frev, crev; void *table; -- cgit v1.2.1 From f6edbf2d616435cda7823942c20005ce198e97c8 Mon Sep 17 00:00:00 2001 From: "Justin.Lee1@Dell.com" Date: Wed, 21 Aug 2019 21:24:52 +0000 Subject: net/ncsi: Fix the payload copying for the request coming from Netlink The request coming from Netlink should use the OEM generic handler. The standard command handler expects payload in bytes/words/dwords but the actual payload is stored in data if the request is coming from Netlink. Signed-off-by: Justin Lee Reviewed-by: Vijay Khemka Signed-off-by: David S. Miller --- net/ncsi/ncsi-cmd.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index eab4346b0a39..0187e65176c0 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -309,14 +309,21 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca) int ncsi_xmit_cmd(struct ncsi_cmd_arg *nca) { + struct ncsi_cmd_handler *nch = NULL; struct ncsi_request *nr; + unsigned char type; struct ethhdr *eh; - struct ncsi_cmd_handler *nch = NULL; int i, ret; + /* Use OEM generic handler for Netlink request */ + if (nca->req_flags == NCSI_REQ_FLAG_NETLINK_DRIVEN) + type = NCSI_PKT_CMD_OEM; + else + type = nca->type; + /* Search for the handler */ for (i = 0; i < ARRAY_SIZE(ncsi_cmd_handlers); i++) { - if (ncsi_cmd_handlers[i].type == nca->type) { + if (ncsi_cmd_handlers[i].type == type) { if (ncsi_cmd_handlers[i].handler) nch = &ncsi_cmd_handlers[i]; else -- cgit v1.2.1 From c358ebf59634f06d8ed176da651ec150df3c8686 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 25 Jul 2019 15:40:01 -0400 Subject: drm/nouveau: Don't retry infinitely when receiving no data on i2c over AUX While I had thought I had fixed this issue in: commit 342406e4fbba ("drm/nouveau/i2c: Disable i2c bus access after ->fini()") It turns out that while I did fix the error messages I was seeing on my P50 when trying to access i2c busses with the GPU in runtime suspend, I accidentally had missed one important detail that was mentioned on the bug report this commit was supposed to fix: that the CPU would only lock up when trying to access i2c busses _on connected devices_ _while the GPU is not in runtime suspend_. Whoops. That definitely explains why I was not able to get my machine to hang with i2c bus interactions until now, as plugging my P50 into it's dock with an HDMI monitor connected allowed me to finally reproduce this locally. Now that I have managed to reproduce this issue properly, it looks like the problem is much simpler then it looks. It turns out that some connected devices, such as MST laptop docks, will actually ACK i2c reads even if no data was actually read: [ 275.063043] nouveau 0000:01:00.0: i2c: aux 000a: 1: 0000004c 1 [ 275.063447] nouveau 0000:01:00.0: i2c: aux 000a: 00 01101000 10040000 [ 275.063759] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000001 [ 275.064024] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 [ 275.064285] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 [ 275.064594] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 Because we don't handle the situation of i2c ack without any data, we end up entering an infinite loop in nvkm_i2c_aux_i2c_xfer() since the value of cnt always remains at 0. This finally properly explains how this could result in a CPU hang like the ones observed in the aforementioned commit. So, fix this by retrying transactions if no data is written or received, and give up and fail the transaction if we continue to not write or receive any data after 32 retries. Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c index b4e7404fe660..a11637b0f6cc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c @@ -40,8 +40,7 @@ nvkm_i2c_aux_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) u8 *ptr = msg->buf; while (remaining) { - u8 cnt = (remaining > 16) ? 16 : remaining; - u8 cmd; + u8 cnt, retries, cmd; if (msg->flags & I2C_M_RD) cmd = 1; @@ -51,10 +50,19 @@ nvkm_i2c_aux_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) if (mcnt || remaining > 16) cmd |= 4; /* MOT */ - ret = aux->func->xfer(aux, true, cmd, msg->addr, ptr, &cnt); - if (ret < 0) { - nvkm_i2c_aux_release(aux); - return ret; + for (retries = 0, cnt = 0; + retries < 32 && !cnt; + retries++) { + cnt = min_t(u8, remaining, 16); + ret = aux->func->xfer(aux, true, cmd, + msg->addr, ptr, &cnt); + if (ret < 0) + goto out; + } + if (!cnt) { + AUX_TRACE(aux, "no data after 32 retries"); + ret = -EIO; + goto out; } ptr += cnt; @@ -64,8 +72,10 @@ nvkm_i2c_aux_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) msg++; } + ret = num; +out: nvkm_i2c_aux_release(aux); - return num; + return ret; } static u32 -- cgit v1.2.1 From 1fb254aa983bf190cfd685d40c64a480a9bafaee Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 22 Aug 2019 20:55:54 -0700 Subject: xfs: fix missing ILOCK unlock when xfs_setattr_nonsize fails due to EDQUOT Benjamin Moody reported to Debian that XFS partially wedges when a chgrp fails on account of being out of disk quota. I ran his reproducer script: # adduser dummy # adduser dummy plugdev # dd if=/dev/zero bs=1M count=100 of=test.img # mkfs.xfs test.img # mount -t xfs -o gquota test.img /mnt # mkdir -p /mnt/dummy # chown -c dummy /mnt/dummy # xfs_quota -xc 'limit -g bsoft=100k bhard=100k plugdev' /mnt (and then as user dummy) $ dd if=/dev/urandom bs=1M count=50 of=/mnt/dummy/foo $ chgrp plugdev /mnt/dummy/foo and saw: ================================================ WARNING: lock held when returning to user space! 5.3.0-rc5 #rc5 Tainted: G W ------------------------------------------------ chgrp/47006 is leaving the kernel with locks still held! 1 lock held by chgrp/47006: #0: 000000006664ea2d (&xfs_nondir_ilock_class){++++}, at: xfs_ilock+0xd2/0x290 [xfs] ...which is clearly caused by xfs_setattr_nonsize failing to unlock the ILOCK after the xfs_qm_vop_chown_reserve call fails. Add the missing unlock. Reported-by: benjamin.moody@gmail.com Fixes: 253f4911f297 ("xfs: better xfs_trans_alloc interface") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Tested-by: Salvatore Bonaccorso --- fs/xfs/xfs_iops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ff3c1fae5357..fe285d123d69 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -793,6 +793,7 @@ xfs_setattr_nonsize( out_cancel: xfs_trans_cancel(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); out_dqrele: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); -- cgit v1.2.1 From fbf0a7f44cdd4041b5a3e2b14deaa0adebaf40da Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 5 Aug 2019 12:54:01 +0200 Subject: drm/qxl: get vga ioports qxl has two modes: "native" (used by the drm driver) and "vga" (vga compatibility mode, typically used for boot display and firmware framebuffers). Accessing any vga ioport will switch the qxl device into vga mode. The qxl driver never does that, but other drivers accessing vga ports can trigger that too and therefore disturb qxl operation. So aquire the legacy vga ioports from vgaarb to avoid that. Reproducer: Boot kvm guest with both qxl and i915 vgpu, with qxl being first in pci scan order. v2: Skip this for secondary qxl cards which don't have vga mode in the first place (Frediano). Cc: Frediano Ziglio Signed-off-by: Gerd Hoffmann Reviewed-by: Dave Airlie Link: http://patchwork.freedesktop.org/patch/msgid/20190805105401.29874-1-kraxel@redhat.com --- drivers/gpu/drm/qxl/qxl_drv.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c index f33e349c4ec5..952201c6d821 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.c +++ b/drivers/gpu/drm/qxl/qxl_drv.c @@ -59,6 +59,11 @@ module_param_named(num_heads, qxl_num_crtc, int, 0400); static struct drm_driver qxl_driver; static struct pci_driver qxl_pci_driver; +static bool is_vga(struct pci_dev *pdev) +{ + return pdev->class == PCI_CLASS_DISPLAY_VGA << 8; +} + static int qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -83,9 +88,17 @@ qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto disable_pci; + if (is_vga(pdev)) { + ret = vga_get_interruptible(pdev, VGA_RSRC_LEGACY_IO); + if (ret) { + DRM_ERROR("can't get legacy vga ioports\n"); + goto disable_pci; + } + } + ret = qxl_device_init(qdev, &qxl_driver, pdev); if (ret) - goto disable_pci; + goto put_vga; ret = qxl_modeset_init(qdev); if (ret) @@ -105,6 +118,9 @@ modeset_cleanup: qxl_modeset_fini(qdev); unload: qxl_device_fini(qdev); +put_vga: + if (is_vga(pdev)) + vga_put(pdev, VGA_RSRC_LEGACY_IO); disable_pci: pci_disable_device(pdev); free_dev: @@ -122,6 +138,8 @@ qxl_pci_remove(struct pci_dev *pdev) qxl_modeset_fini(qdev); qxl_device_fini(qdev); + if (is_vga(pdev)) + vga_put(pdev, VGA_RSRC_LEGACY_IO); dev->dev_private = NULL; kfree(qdev); -- cgit v1.2.1 From 8090f7eb318d4241625449252db2741e7703e027 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 21 Aug 2019 21:32:26 +0300 Subject: drm/omap: Fix port lookup for SDI output When refactoring port lookup for DSS outputs, commit d17eb4537a7e ("drm/omap: Factor out common init/cleanup code for output devices") incorrectly hardcoded usage of DT port 0. This breaks operation for SDI (which uses the DT port 1) and DPI outputs other than DPI0 (which are not used in mainline DT sources). Fix this by using the port number from the output omap_dss_device of_ports field. Fixes: d17eb4537a7e ("drm/omap: Factor out common init/cleanup code for output devices") Signed-off-by: Laurent Pinchart Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20190821183226.13784-1-laurent.pinchart@ideasonboard.com Tested-by: Aaro Koskinen --- drivers/gpu/drm/omapdrm/dss/output.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/dss/output.c b/drivers/gpu/drm/omapdrm/dss/output.c index de0f882f0f7b..14b41de44ebc 100644 --- a/drivers/gpu/drm/omapdrm/dss/output.c +++ b/drivers/gpu/drm/omapdrm/dss/output.c @@ -4,6 +4,7 @@ * Author: Archit Taneja */ +#include #include #include #include @@ -20,7 +21,8 @@ int omapdss_device_init_output(struct omap_dss_device *out) { struct device_node *remote_node; - remote_node = of_graph_get_remote_node(out->dev->of_node, 0, 0); + remote_node = of_graph_get_remote_node(out->dev->of_node, + ffs(out->of_ports) - 1, 0); if (!remote_node) { dev_dbg(out->dev, "failed to find video sink\n"); return 0; -- cgit v1.2.1 From 9b2a0a1ef66f96bf34921a3865581eca32ff05ec Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 21 Aug 2019 13:12:09 +0200 Subject: drm/virtio: use virtio_max_dma_size We must make sure our scatterlist segments are not too big, otherwise we might see swiotlb failures (happens with sev, also reproducable with swiotlb=force). Suggested-by: Laszlo Ersek Signed-off-by: Gerd Hoffmann Reviewed-by: Laszlo Ersek Link: http://patchwork.freedesktop.org/patch/msgid/20190821111210.27165-1-kraxel@redhat.com --- drivers/gpu/drm/virtio/virtgpu_object.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index b2da31310d24..09b526518f5a 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -204,6 +204,7 @@ int virtio_gpu_object_get_sg_table(struct virtio_gpu_device *qdev, .interruptible = false, .no_wait_gpu = false }; + size_t max_segment; /* wtf swapping */ if (bo->pages) @@ -215,8 +216,13 @@ int virtio_gpu_object_get_sg_table(struct virtio_gpu_device *qdev, if (!bo->pages) goto out; - ret = sg_alloc_table_from_pages(bo->pages, pages, nr_pages, 0, - nr_pages << PAGE_SHIFT, GFP_KERNEL); + max_segment = virtio_max_dma_size(qdev->vdev); + max_segment &= PAGE_MASK; + if (max_segment > SCATTERLIST_MAX_SEGMENT) + max_segment = SCATTERLIST_MAX_SEGMENT; + ret = __sg_alloc_table_from_pages(bo->pages, pages, nr_pages, 0, + nr_pages << PAGE_SHIFT, + max_segment, GFP_KERNEL); if (ret) goto out; return 0; -- cgit v1.2.1 From 48057ed1840fde9239b1e000bea1a0a1f07c5e99 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 20 Aug 2019 10:05:27 +0200 Subject: gpio: Fix irqchip initialization order The new API for registering a gpio_irq_chip along with a gpio_chip has a different semantic ordering than the old API which added the irqchip explicitly after registering the gpio_chip. Move the calls to add the gpio_irq_chip *last* in the function, so that the different hooks setting up OF and ACPI and machine gpio_chips are called *before* we try to register the interrupts, preserving the elder semantic order. This cropped up in the PL061 driver which used to work fine with no special ACPI quirks, but started to misbehave using the new API. Fixes: e0d897289813 ("gpio: Implement tighter IRQ chip integration") Cc: Thierry Reding Cc: Grygorii Strashko Cc: Andy Shevchenko Reported-by: Wei Xu Tested-by: Wei Xu Reported-by: Andy Shevchenko Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20190820080527.11796-1-linus.walleij@linaro.org --- drivers/gpio/gpiolib.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 80a2a2cb673b..cca749010cd0 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1373,21 +1373,13 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, if (status) goto err_remove_from_list; - status = gpiochip_irqchip_init_valid_mask(chip); - if (status) - goto err_remove_from_list; - status = gpiochip_alloc_valid_mask(chip); if (status) - goto err_remove_irqchip_mask; - - status = gpiochip_add_irqchip(chip, lock_key, request_key); - if (status) - goto err_free_gpiochip_mask; + goto err_remove_from_list; status = of_gpiochip_add(chip); if (status) - goto err_remove_chip; + goto err_free_gpiochip_mask; status = gpiochip_init_valid_mask(chip); if (status) @@ -1413,6 +1405,14 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, machine_gpiochip_add(chip); + status = gpiochip_irqchip_init_valid_mask(chip); + if (status) + goto err_remove_acpi_chip; + + status = gpiochip_add_irqchip(chip, lock_key, request_key); + if (status) + goto err_remove_irqchip_mask; + /* * By first adding the chardev, and then adding the device, * we get a device node entry in sysfs under @@ -1424,21 +1424,21 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, if (gpiolib_initialized) { status = gpiochip_setup_dev(gdev); if (status) - goto err_remove_acpi_chip; + goto err_remove_irqchip; } return 0; +err_remove_irqchip: + gpiochip_irqchip_remove(chip); +err_remove_irqchip_mask: + gpiochip_irqchip_free_valid_mask(chip); err_remove_acpi_chip: acpi_gpiochip_remove(chip); err_remove_of_chip: gpiochip_free_hogs(chip); of_gpiochip_remove(chip); -err_remove_chip: - gpiochip_irqchip_remove(chip); err_free_gpiochip_mask: gpiochip_free_valid_mask(chip); -err_remove_irqchip_mask: - gpiochip_irqchip_free_valid_mask(chip); err_remove_from_list: spin_lock_irqsave(&gpio_lock, flags); list_del(&gdev->list); -- cgit v1.2.1 From c51bc12d06b3a5494fbfcbd788a8e307932a06e9 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Mon, 1 Jul 2019 18:50:11 +0100 Subject: ARM: 8874/1: mm: only adjust sections of valid mm structures A timing hazard exists when an early fork/exec thread begins exiting and sets its mm pointer to NULL while a separate core tries to update the section information. This commit ensures that the mm pointer is not NULL before setting its section parameters. The arguments provided by commit 11ce4b33aedc ("ARM: 8672/1: mm: remove tasklist locking from update_sections_early()") are equally valid for not requiring grabbing the task_lock around this check. Fixes: 08925c2f124f ("ARM: 8464/1: Update all mm structures with section adjustments") Signed-off-by: Doug Berger Acked-by: Laura Abbott Cc: Mike Rapoport Cc: Andrew Morton Cc: Florian Fainelli Cc: Rob Herring Cc: "Steven Rostedt (VMware)" Cc: Peng Fan Cc: Geert Uytterhoeven Signed-off-by: Russell King --- arch/arm/mm/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 16d373d587c4..3a65ded832df 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -628,7 +628,8 @@ static void update_sections_early(struct section_perm perms[], int n) if (t->flags & PF_KTHREAD) continue; for_each_thread(t, s) - set_section_perms(perms, n, true, s->mm); + if (s->mm) + set_section_perms(perms, n, true, s->mm); } set_section_perms(perms, n, true, current->active_mm); set_section_perms(perms, n, true, &init_mm); -- cgit v1.2.1 From 69389837171140e2a94c5b8683c08dceaa8c9c8c Mon Sep 17 00:00:00 2001 From: Lvqiang Huang Date: Thu, 1 Aug 2019 08:15:23 +0100 Subject: ARM: 8897/1: check stmfd instruction using right shift In the commit ef41b5c92498 ("ARM: make kernel oops easier to read"), - .word 0xe92d0000 >> 10 @ stmfd sp!, {} + .word 0xe92d0000 >> 11 @ stmfd sp!, {} then the shift need to change to 11. Signed-off-by: Lvqiang Huang Signed-off-by: Chunyan Zhang Signed-off-by: Russell King --- arch/arm/lib/backtrace.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S index 1d5210eb4776..582925238d65 100644 --- a/arch/arm/lib/backtrace.S +++ b/arch/arm/lib/backtrace.S @@ -66,7 +66,7 @@ for_each_frame: tst frame, mask @ Check for address exceptions 1003: ldr r2, [sv_pc, #-4] @ if stmfd sp!, {args} exists, ldr r3, .Ldsi+4 @ adjust saved 'pc' back one - teq r3, r2, lsr #10 @ instruction + teq r3, r2, lsr #11 @ instruction subne r0, sv_pc, #4 @ allow for mov subeq r0, sv_pc, #8 @ allow for mov + stmia -- cgit v1.2.1 From 8f6a79112a360ff05b8aa4a9be081d3eb9057077 Mon Sep 17 00:00:00 2001 From: "james qian wang (Arm Technology China)" Date: Mon, 19 Aug 2019 08:01:57 +0000 Subject: drm/komeda: Fix error: not allocating enough data 1592 vs 1584 The patch 5d51f6c0da1b: "drm/komeda: Add writeback support" from May 23, 2019, leads to the following static checker warning: drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c:151 komeda_wb_connector_add() error: not allocating enough data 1592 vs 1584 This is a typo which misuse "wb_conn" but which should be "kwb_conn" to allocate the memory. Reported-by: Dan Carpenter Signed-off-by: james qian wang (Arm Technology China) Reviewed-by: Ayan Kumar Halder Link: https://patchwork.freedesktop.org/patch/msgid/20190819080136.10190-1-james.qian.wang@arm.com --- drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c index 617e1f7b8472..2851cac94d86 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c @@ -148,7 +148,7 @@ static int komeda_wb_connector_add(struct komeda_kms_dev *kms, if (!kcrtc->master->wb_layer) return 0; - kwb_conn = kzalloc(sizeof(*wb_conn), GFP_KERNEL); + kwb_conn = kzalloc(sizeof(*kwb_conn), GFP_KERNEL); if (!kwb_conn) return -ENOMEM; -- cgit v1.2.1 From 61d05b184963523e50729af8466b72e9c8a4f8be Mon Sep 17 00:00:00 2001 From: "james qian wang (Arm Technology China)" Date: Mon, 12 Aug 2019 11:23:41 +0000 Subject: drm/komeda: Fix warning -Wunused-but-set-variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed two -Wunused-but-set-variable warnings: /arm/linux/display/aosp-4.14-drm-next/drivers/gpu/drm/arm/display/komeda/komeda_kms.c: In function ‘komeda_crtc_normalize_zpos’: /arm/linux/display/aosp-4.14-drm-next/drivers/gpu/drm/arm/display/komeda/komeda_kms.c:150:26: warning: variable ‘fb’ set but not used [-Wunused-but-set-variable] struct drm_framebuffer *fb; ^~ /arm/linux/display/aosp-4.14-drm-next/drivers/gpu/drm/arm/display/komeda/komeda_kms.c: In function ‘komeda_kms_check’: /arm/linux/display/aosp-4.14-drm-next/drivers/gpu/drm/arm/display/komeda/komeda_kms.c:209:25: warning: variable ‘old_crtc_st’ set but not used [-Wunused-but-set-variable] struct drm_crtc_state *old_crtc_st, *new_crtc_st; ^~~~~~~~~~~ Signed-off-by: james qian wang (Arm Technology China) Reviewed-by: Ayan Kumar Halder Link: https://patchwork.freedesktop.org/patch/msgid/20190812112322.15990-1-james.qian.wang@arm.com --- drivers/gpu/drm/arm/display/komeda/komeda_kms.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c index d50e75f0b2bd..1f0e3f4e8d74 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c @@ -147,7 +147,6 @@ static int komeda_crtc_normalize_zpos(struct drm_crtc *crtc, struct komeda_crtc_state *kcrtc_st = to_kcrtc_st(crtc_st); struct komeda_plane_state *kplane_st; struct drm_plane_state *plane_st; - struct drm_framebuffer *fb; struct drm_plane *plane; struct list_head zorder_list; int order = 0, err; @@ -173,7 +172,6 @@ static int komeda_crtc_normalize_zpos(struct drm_crtc *crtc, list_for_each_entry(kplane_st, &zorder_list, zlist_node) { plane_st = &kplane_st->base; - fb = plane_st->fb; plane = plane_st->plane; plane_st->normalized_zpos = order++; @@ -206,7 +204,7 @@ static int komeda_kms_check(struct drm_device *dev, struct drm_atomic_state *state) { struct drm_crtc *crtc; - struct drm_crtc_state *old_crtc_st, *new_crtc_st; + struct drm_crtc_state *new_crtc_st; int i, err; err = drm_atomic_helper_check_modeset(dev, state); @@ -217,7 +215,7 @@ static int komeda_kms_check(struct drm_device *dev, * so need to add all affected_planes (even unchanged) to * drm_atomic_state. */ - for_each_oldnew_crtc_in_state(state, crtc, old_crtc_st, new_crtc_st, i) { + for_each_new_crtc_in_state(state, crtc, new_crtc_st, i) { err = drm_atomic_add_affected_planes(state, crtc); if (err) return err; -- cgit v1.2.1 From 95abcd33ad513faf9cf3f54a28fb4982407e5a92 Mon Sep 17 00:00:00 2001 From: "james qian wang (Arm Technology China)" Date: Tue, 13 Aug 2019 11:08:20 +0000 Subject: drm/komeda: Clean warning 'komeda_component_add' might be a candidate for 'gnu_printf' komeda/komeda_pipeline.c: In function 'komeda_component_add': komeda/komeda_pipeline.c:212:3: warning: function 'komeda_component_add' might be a candidate for 'gnu_printf' format attribute [-Wsuggest-attribute=format] vsnprintf(c->name, sizeof(c->name), name_fmt, args); ^~~~~~~~~ Signed-off-by: james qian wang (Arm Technology China) Reviewed-by: Ayan Kumar Halder Link: https://patchwork.freedesktop.org/patch/msgid/20190813110759.10425-1-james.qian.wang@arm.com --- drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h index a90bcbb3cb23..14b683164544 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h @@ -480,6 +480,7 @@ void komeda_pipeline_dump_register(struct komeda_pipeline *pipe, struct seq_file *sf); /* component APIs */ +extern __printf(10, 11) struct komeda_component * komeda_component_add(struct komeda_pipeline *pipe, size_t comp_sz, u32 id, u32 hw_id, -- cgit v1.2.1 From 51a44a28eefd0d4c1addeb23fc5a599ff1787dfd Mon Sep 17 00:00:00 2001 From: Mihail Atanassov Date: Tue, 20 Aug 2019 15:16:58 +0000 Subject: drm/komeda: Add missing of_node_get() call komeda_pipeline_destroy has the matching of_node_put(). Fixes: 29e56aec911dd ("drm/komeda: Add DT parsing") Signed-off-by: Mihail Atanassov Reviewed-by: Ayan Kumar Halder [Rebased on the latest drm-misc-fixes] Signed-off-by: Ayan Kumar Halder Link: https://patchwork.freedesktop.org/patch/325278/ Change-Id: I5fa2479d6cb3a77182f1a92833c1c0bca8668cb4 --- drivers/gpu/drm/arm/display/komeda/komeda_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c index a0eabc134dd6..9d4d5075cc64 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c @@ -127,7 +127,7 @@ static int komeda_parse_pipe_dt(struct komeda_dev *mdev, struct device_node *np) pipe->of_output_port = of_graph_get_port_by_id(np, KOMEDA_OF_PORT_OUTPUT); - pipe->of_node = np; + pipe->of_node = of_node_get(np); return 0; } -- cgit v1.2.1 From 1cfd5d3399e87167b7f9157ef99daa0e959f395d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 23 Aug 2019 09:54:09 -0400 Subject: dm table: fix invalid memory accesses with too high sector number If the sector number is too high, dm_table_find_target() should return a pointer to a zeroed dm_target structure (the caller should test it with dm_target_is_valid). However, for some table sizes, the code in dm_table_find_target() that performs btree lookup will access out of bound memory structures. Fix this bug by testing the sector number at the beginning of dm_table_find_target(). Also, add an "inline" keyword to the function dm_table_get_size() because this is a hot path. Fixes: 512875bd9661 ("dm: table detect io beyond device") Cc: stable@vger.kernel.org Reported-by: Zhang Tao Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 7b6c3ee9e755..8820931ec7d2 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1342,7 +1342,7 @@ void dm_table_event(struct dm_table *t) } EXPORT_SYMBOL(dm_table_event); -sector_t dm_table_get_size(struct dm_table *t) +inline sector_t dm_table_get_size(struct dm_table *t) { return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; } @@ -1367,6 +1367,9 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) unsigned int l, n = 0, k = 0; sector_t *node; + if (unlikely(sector >= dm_table_get_size(t))) + return &t->targets[t->num_targets]; + for (l = 0; l < t->depth; l++) { n = get_child(n, k); node = get_node(t, l, n); -- cgit v1.2.1 From b63f20a778c88b6a04458ed6ffc69da953d3a109 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 22 Aug 2019 14:11:22 -0700 Subject: x86/retpoline: Don't clobber RFLAGS during CALL_NOSPEC on i386 Use 'lea' instead of 'add' when adjusting %rsp in CALL_NOSPEC so as to avoid clobbering flags. KVM's emulator makes indirect calls into a jump table of sorts, where the destination of the CALL_NOSPEC is a small blob of code that performs fast emulation by executing the target instruction with fixed operands. adcb_al_dl: 0x000339f8 <+0>: adc %dl,%al 0x000339fa <+2>: ret A major motiviation for doing fast emulation is to leverage the CPU to handle consumption and manipulation of arithmetic flags, i.e. RFLAGS is both an input and output to the target of CALL_NOSPEC. Clobbering flags results in all sorts of incorrect emulation, e.g. Jcc instructions often take the wrong path. Sans the nops... asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" 0x0003595a <+58>: mov 0xc0(%ebx),%eax 0x00035960 <+64>: mov 0x60(%ebx),%edx 0x00035963 <+67>: mov 0x90(%ebx),%ecx 0x00035969 <+73>: push %edi 0x0003596a <+74>: popf 0x0003596b <+75>: call *%esi 0x000359a0 <+128>: pushf 0x000359a1 <+129>: pop %edi 0x000359a2 <+130>: mov %eax,0xc0(%ebx) 0x000359b1 <+145>: mov %edx,0x60(%ebx) ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); 0x000359a8 <+136>: mov -0x10(%ebp),%eax 0x000359ab <+139>: and $0x8d5,%edi 0x000359b4 <+148>: and $0xfffff72a,%eax 0x000359b9 <+153>: or %eax,%edi 0x000359bd <+157>: mov %edi,0x4(%ebx) For the most part this has gone unnoticed as emulation of guest code that can trigger fast emulation is effectively limited to MMIO when running on modern hardware, and MMIO is rarely, if ever, accessed by instructions that affect or consume flags. Breakage is almost instantaneous when running with unrestricted guest disabled, in which case KVM must emulate all instructions when the guest has invalid state, e.g. when the guest is in Big Real Mode during early BIOS. Fixes: 776b043848fd2 ("x86/retpoline: Add initial retpoline support") Fixes: 1a29b5b7f347a ("KVM: x86: Make indirect calls in emulator speculation safe") Signed-off-by: Sean Christopherson Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190822211122.27579-1-sean.j.christopherson@intel.com --- arch/x86/include/asm/nospec-branch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 109f974f9835..80bc209c0708 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -192,7 +192,7 @@ " lfence;\n" \ " jmp 902b;\n" \ " .align 16\n" \ - "903: addl $4, %%esp;\n" \ + "903: lea 4(%%esp), %%esp;\n" \ " pushl %[thunk_target];\n" \ " ret;\n" \ " .align 16\n" \ -- cgit v1.2.1 From c536277e0db1ad2e9fbb9dfd940c3565a14d9c52 Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Thu, 22 Aug 2019 19:37:38 +0200 Subject: RDMA/siw: Fix 64/32bit pointer inconsistency Fixes improper casting between addresses and unsigned types. Changes siw_pbl_get_buffer() function to return appropriate dma_addr_t, and not u64. Also fixes debug prints. Now any potentially kernel private pointers are printed formatted as '%pK', to allow keeping that information secret. Fixes: d941bfe500be ("RDMA/siw: Change CQ flags from 64->32 bits") Fixes: b0fff7317bb4 ("rdma/siw: completion queue methods") Fixes: 8b6a361b8c48 ("rdma/siw: receive path") Fixes: b9be6f18cf9e ("rdma/siw: transmit path") Fixes: f29dd55b0236 ("rdma/siw: queue pair methods") Fixes: 2251334dcac9 ("rdma/siw: application buffer management") Fixes: 303ae1cdfdf7 ("rdma/siw: application interface") Fixes: 6c52fdc244b5 ("rdma/siw: connection management") Fixes: a531975279f3 ("rdma/siw: main include file") Reported-by: Geert Uytterhoeven Reported-by: Jason Gunthorpe Reported-by: Leon Romanovsky Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20190822173738.26817-1-bmt@zurich.ibm.com Signed-off-by: Doug Ledford --- drivers/infiniband/sw/siw/siw.h | 8 ++-- drivers/infiniband/sw/siw/siw_cm.c | 77 ++++++++++++++++------------------- drivers/infiniband/sw/siw/siw_cq.c | 5 ++- drivers/infiniband/sw/siw/siw_mem.c | 14 +++---- drivers/infiniband/sw/siw/siw_mem.h | 2 +- drivers/infiniband/sw/siw/siw_qp.c | 2 +- drivers/infiniband/sw/siw/siw_qp_rx.c | 26 ++++++------ drivers/infiniband/sw/siw/siw_qp_tx.c | 43 +++++++++---------- drivers/infiniband/sw/siw/siw_verbs.c | 40 +++++++++--------- 9 files changed, 108 insertions(+), 109 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 77b1aabf6ff3..dba4535494ab 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -138,9 +138,9 @@ struct siw_umem { }; struct siw_pble { - u64 addr; /* Address of assigned user buffer */ - u64 size; /* Size of this entry */ - u64 pbl_off; /* Total offset from start of PBL */ + dma_addr_t addr; /* Address of assigned buffer */ + unsigned int size; /* Size of this entry */ + unsigned long pbl_off; /* Total offset from start of PBL */ }; struct siw_pbl { @@ -734,7 +734,7 @@ static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len) "MEM[0x%08x] %s: " fmt, mem->stag, __func__, ##__VA_ARGS__) #define siw_dbg_cep(cep, fmt, ...) \ - ibdev_dbg(&cep->sdev->base_dev, "CEP[0x%p] %s: " fmt, \ + ibdev_dbg(&cep->sdev->base_dev, "CEP[0x%pK] %s: " fmt, \ cep, __func__, ##__VA_ARGS__) void siw_cq_flush(struct siw_cq *cq); diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index fc97571a640b..1db5ad3d9580 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -355,8 +355,8 @@ static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, getname_local(cep->sock, &event.local_addr); getname_peer(cep->sock, &event.remote_addr); } - siw_dbg_cep(cep, "[QP %u]: id 0x%p, reason=%d, status=%d\n", - cep->qp ? qp_id(cep->qp) : -1, id, reason, status); + siw_dbg_cep(cep, "[QP %u]: reason=%d, status=%d\n", + cep->qp ? qp_id(cep->qp) : UINT_MAX, reason, status); return id->event_handler(id, &event); } @@ -947,8 +947,6 @@ static void siw_accept_newconn(struct siw_cep *cep) siw_cep_get(new_cep); new_s->sk->sk_user_data = new_cep; - siw_dbg_cep(cep, "listen socket 0x%p, new 0x%p\n", s, new_s); - if (siw_tcp_nagle == false) { int val = 1; @@ -1011,7 +1009,8 @@ static void siw_cm_work_handler(struct work_struct *w) cep = work->cep; siw_dbg_cep(cep, "[QP %u]: work type: %d, state %d\n", - cep->qp ? qp_id(cep->qp) : -1, work->type, cep->state); + cep->qp ? qp_id(cep->qp) : UINT_MAX, + work->type, cep->state); siw_cep_set_inuse(cep); @@ -1145,9 +1144,9 @@ static void siw_cm_work_handler(struct work_struct *w) } if (release_cep) { siw_dbg_cep(cep, - "release: timer=%s, QP[%u], id 0x%p\n", + "release: timer=%s, QP[%u]\n", cep->mpa_timer ? "y" : "n", - cep->qp ? qp_id(cep->qp) : -1, cep->cm_id); + cep->qp ? qp_id(cep->qp) : UINT_MAX); siw_cancel_mpatimer(cep); @@ -1211,8 +1210,8 @@ int siw_cm_queue_work(struct siw_cep *cep, enum siw_work_type type) else delay = MPAREP_TIMEOUT; } - siw_dbg_cep(cep, "[QP %u]: work type: %d, work 0x%p, timeout %lu\n", - cep->qp ? qp_id(cep->qp) : -1, type, work, delay); + siw_dbg_cep(cep, "[QP %u]: work type: %d, timeout %lu\n", + cep->qp ? qp_id(cep->qp) : -1, type, delay); queue_delayed_work(siw_cm_wq, &work->work, delay); @@ -1376,16 +1375,16 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) } if (v4) siw_dbg_qp(qp, - "id 0x%p, pd_len %d, laddr %pI4 %d, raddr %pI4 %d\n", - id, pd_len, + "pd_len %d, laddr %pI4 %d, raddr %pI4 %d\n", + pd_len, &((struct sockaddr_in *)(laddr))->sin_addr, ntohs(((struct sockaddr_in *)(laddr))->sin_port), &((struct sockaddr_in *)(raddr))->sin_addr, ntohs(((struct sockaddr_in *)(raddr))->sin_port)); else siw_dbg_qp(qp, - "id 0x%p, pd_len %d, laddr %pI6 %d, raddr %pI6 %d\n", - id, pd_len, + "pd_len %d, laddr %pI6 %d, raddr %pI6 %d\n", + pd_len, &((struct sockaddr_in6 *)(laddr))->sin6_addr, ntohs(((struct sockaddr_in6 *)(laddr))->sin6_port), &((struct sockaddr_in6 *)(raddr))->sin6_addr, @@ -1508,8 +1507,7 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) if (rv >= 0) { rv = siw_cm_queue_work(cep, SIW_CM_WORK_MPATIMEOUT); if (!rv) { - siw_dbg_cep(cep, "id 0x%p, [QP %u]: exit\n", id, - qp_id(qp)); + siw_dbg_cep(cep, "[QP %u]: exit\n", qp_id(qp)); siw_cep_set_free(cep); return 0; } @@ -1581,7 +1579,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) siw_cancel_mpatimer(cep); if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { - siw_dbg_cep(cep, "id 0x%p: out of state\n", id); + siw_dbg_cep(cep, "out of state\n"); siw_cep_set_free(cep); siw_cep_put(cep); @@ -1602,7 +1600,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) up_write(&qp->state_lock); goto error; } - siw_dbg_cep(cep, "id 0x%p\n", id); + siw_dbg_cep(cep, "[QP %d]\n", params->qpn); if (try_gso && cep->mpa.hdr.params.bits & MPA_RR_FLAG_GSO_EXP) { siw_dbg_cep(cep, "peer allows GSO on TX\n"); @@ -1612,8 +1610,8 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) params->ird > sdev->attrs.max_ird) { siw_dbg_cep( cep, - "id 0x%p, [QP %u]: ord %d (max %d), ird %d (max %d)\n", - id, qp_id(qp), params->ord, sdev->attrs.max_ord, + "[QP %u]: ord %d (max %d), ird %d (max %d)\n", + qp_id(qp), params->ord, sdev->attrs.max_ord, params->ird, sdev->attrs.max_ird); rv = -EINVAL; up_write(&qp->state_lock); @@ -1625,8 +1623,8 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) if (params->private_data_len > max_priv_data) { siw_dbg_cep( cep, - "id 0x%p, [QP %u]: private data length: %d (max %d)\n", - id, qp_id(qp), params->private_data_len, max_priv_data); + "[QP %u]: private data length: %d (max %d)\n", + qp_id(qp), params->private_data_len, max_priv_data); rv = -EINVAL; up_write(&qp->state_lock); goto error; @@ -1680,7 +1678,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) qp_attrs.flags = SIW_MPA_CRC; qp_attrs.state = SIW_QP_STATE_RTS; - siw_dbg_cep(cep, "id 0x%p, [QP%u]: moving to rts\n", id, qp_id(qp)); + siw_dbg_cep(cep, "[QP%u]: moving to rts\n", qp_id(qp)); /* Associate QP with CEP */ siw_cep_get(cep); @@ -1701,8 +1699,8 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) if (rv) goto error; - siw_dbg_cep(cep, "id 0x%p, [QP %u]: send mpa reply, %d byte pdata\n", - id, qp_id(qp), params->private_data_len); + siw_dbg_cep(cep, "[QP %u]: send mpa reply, %d byte pdata\n", + qp_id(qp), params->private_data_len); rv = siw_send_mpareqrep(cep, params->private_data, params->private_data_len); @@ -1760,14 +1758,14 @@ int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) siw_cancel_mpatimer(cep); if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { - siw_dbg_cep(cep, "id 0x%p: out of state\n", id); + siw_dbg_cep(cep, "out of state\n"); siw_cep_set_free(cep); siw_cep_put(cep); /* put last reference */ return -ECONNRESET; } - siw_dbg_cep(cep, "id 0x%p, cep->state %d, pd_len %d\n", id, cep->state, + siw_dbg_cep(cep, "cep->state %d, pd_len %d\n", cep->state, pd_len); if (__mpa_rr_revision(cep->mpa.hdr.params.bits) >= MPA_REVISION_1) { @@ -1805,14 +1803,14 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog, rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val, sizeof(s_val)); if (rv) { - siw_dbg(id->device, "id 0x%p: setsockopt error: %d\n", id, rv); + siw_dbg(id->device, "setsockopt error: %d\n", rv); goto error; } rv = s->ops->bind(s, laddr, addr_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)); if (rv) { - siw_dbg(id->device, "id 0x%p: socket bind error: %d\n", id, rv); + siw_dbg(id->device, "socket bind error: %d\n", rv); goto error; } cep = siw_cep_alloc(sdev); @@ -1825,13 +1823,13 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog, rv = siw_cm_alloc_work(cep, backlog); if (rv) { siw_dbg(id->device, - "id 0x%p: alloc_work error %d, backlog %d\n", id, + "alloc_work error %d, backlog %d\n", rv, backlog); goto error; } rv = s->ops->listen(s, backlog); if (rv) { - siw_dbg(id->device, "id 0x%p: listen error %d\n", id, rv); + siw_dbg(id->device, "listen error %d\n", rv); goto error; } cep->cm_id = id; @@ -1915,8 +1913,7 @@ static void siw_drop_listeners(struct iw_cm_id *id) list_del(p); - siw_dbg_cep(cep, "id 0x%p: drop cep, state %d\n", id, - cep->state); + siw_dbg_cep(cep, "drop cep, state %d\n", cep->state); siw_cep_set_inuse(cep); @@ -1953,7 +1950,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) struct net_device *dev = to_siw_dev(id->device)->netdev; int rv = 0, listeners = 0; - siw_dbg(id->device, "id 0x%p: backlog %d\n", id, backlog); + siw_dbg(id->device, "backlog %d\n", backlog); /* * For each attached address of the interface, create a @@ -1969,8 +1966,8 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) s_raddr = (struct sockaddr_in *)&id->remote_addr; siw_dbg(id->device, - "id 0x%p: laddr %pI4:%d, raddr %pI4:%d\n", - id, &s_laddr.sin_addr, ntohs(s_laddr.sin_port), + "laddr %pI4:%d, raddr %pI4:%d\n", + &s_laddr.sin_addr, ntohs(s_laddr.sin_port), &s_raddr->sin_addr, ntohs(s_raddr->sin_port)); rtnl_lock(); @@ -1995,8 +1992,8 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) *s_raddr = &to_sockaddr_in6(id->remote_addr); siw_dbg(id->device, - "id 0x%p: laddr %pI6:%d, raddr %pI6:%d\n", - id, &s_laddr->sin6_addr, ntohs(s_laddr->sin6_port), + "laddr %pI6:%d, raddr %pI6:%d\n", + &s_laddr->sin6_addr, ntohs(s_laddr->sin6_port), &s_raddr->sin6_addr, ntohs(s_raddr->sin6_port)); read_lock_bh(&in6_dev->lock); @@ -2029,17 +2026,15 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) else if (!rv) rv = -EINVAL; - siw_dbg(id->device, "id 0x%p: %s\n", id, rv ? "FAIL" : "OK"); + siw_dbg(id->device, "%s\n", rv ? "FAIL" : "OK"); return rv; } int siw_destroy_listen(struct iw_cm_id *id) { - siw_dbg(id->device, "id 0x%p\n", id); - if (!id->provider_data) { - siw_dbg(id->device, "id 0x%p: no cep(s)\n", id); + siw_dbg(id->device, "no cep(s)\n"); return 0; } siw_drop_listeners(id); diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c index e381ae9b7d62..d8db3bee9da7 100644 --- a/drivers/infiniband/sw/siw/siw_cq.c +++ b/drivers/infiniband/sw/siw/siw_cq.c @@ -71,9 +71,10 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) wc->wc_flags = IB_WC_WITH_INVALIDATE; } wc->qp = cqe->base_qp; - siw_dbg_cq(cq, "idx %u, type %d, flags %2x, id 0x%p\n", + siw_dbg_cq(cq, + "idx %u, type %d, flags %2x, id 0x%pK\n", cq->cq_get % cq->num_cqe, cqe->opcode, - cqe->flags, (void *)cqe->id); + cqe->flags, (void *)(uintptr_t)cqe->id); } WRITE_ONCE(cqe->flags, 0); cq->cq_get++; diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c index 67171c82b0c4..87a56039f0ef 100644 --- a/drivers/infiniband/sw/siw/siw_mem.c +++ b/drivers/infiniband/sw/siw/siw_mem.c @@ -197,12 +197,12 @@ int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr, */ if (addr < mem->va || addr + len > mem->va + mem->len) { siw_dbg_pd(pd, "MEM interval len %d\n", len); - siw_dbg_pd(pd, "[0x%016llx, 0x%016llx] out of bounds\n", - (unsigned long long)addr, - (unsigned long long)(addr + len)); - siw_dbg_pd(pd, "[0x%016llx, 0x%016llx] STag=0x%08x\n", - (unsigned long long)mem->va, - (unsigned long long)(mem->va + mem->len), + siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n", + (void *)(uintptr_t)addr, + (void *)(uintptr_t)(addr + len)); + siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n", + (void *)(uintptr_t)mem->va, + (void *)(uintptr_t)(mem->va + mem->len), mem->stag); return -E_BASE_BOUNDS; @@ -330,7 +330,7 @@ out: * Optionally, provides remaining len within current element, and * current PBL index for later resume at same element. */ -u64 siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx) +dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx) { int i = idx ? *idx : 0; diff --git a/drivers/infiniband/sw/siw/siw_mem.h b/drivers/infiniband/sw/siw/siw_mem.h index f43daf280891..db138c8423da 100644 --- a/drivers/infiniband/sw/siw/siw_mem.h +++ b/drivers/infiniband/sw/siw/siw_mem.h @@ -9,7 +9,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable); void siw_umem_release(struct siw_umem *umem, bool dirty); struct siw_pbl *siw_pbl_alloc(u32 num_buf); -u64 siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx); +dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx); struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index); int siw_mem_add(struct siw_device *sdev, struct siw_mem *m); int siw_invalidate_stag(struct ib_pd *pd, u32 stag); diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 0990307c5d2c..430314c8abd9 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -949,7 +949,7 @@ skip_irq: rv = -EINVAL; goto out; } - wqe->sqe.sge[0].laddr = (u64)&wqe->sqe.sge[1]; + wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1]; wqe->sqe.sge[0].lkey = 0; wqe->sqe.num_sge = 1; } diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index f87657a11657..c0a887240325 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -38,9 +38,10 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem, p = siw_get_upage(umem, dest_addr); if (unlikely(!p)) { - pr_warn("siw: %s: [QP %u]: bogus addr: %p, %p\n", + pr_warn("siw: %s: [QP %u]: bogus addr: %pK, %pK\n", __func__, qp_id(rx_qp(srx)), - (void *)dest_addr, (void *)umem->fp_addr); + (void *)(uintptr_t)dest_addr, + (void *)(uintptr_t)umem->fp_addr); /* siw internal error */ srx->skb_copied += copied; srx->skb_new -= copied; @@ -50,7 +51,7 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem, pg_off = dest_addr & ~PAGE_MASK; bytes = min(len, (int)PAGE_SIZE - pg_off); - siw_dbg_qp(rx_qp(srx), "page %p, bytes=%u\n", p, bytes); + siw_dbg_qp(rx_qp(srx), "page %pK, bytes=%u\n", p, bytes); dest = kmap_atomic(p); rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off, @@ -104,11 +105,11 @@ static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len) { int rv; - siw_dbg_qp(rx_qp(srx), "kva: 0x%p, len: %u\n", kva, len); + siw_dbg_qp(rx_qp(srx), "kva: 0x%pK, len: %u\n", kva, len); rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len); if (unlikely(rv)) { - pr_warn("siw: [QP %u]: %s, len %d, kva 0x%p, rv %d\n", + pr_warn("siw: [QP %u]: %s, len %d, kva 0x%pK, rv %d\n", qp_id(rx_qp(srx)), __func__, len, kva, rv); return rv; @@ -132,7 +133,7 @@ static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx, while (len) { int bytes; - u64 buf_addr = + dma_addr_t buf_addr = siw_pbl_get_buffer(pbl, offset, &bytes, pbl_idx); if (!buf_addr) break; @@ -485,8 +486,8 @@ int siw_proc_send(struct siw_qp *qp) mem_p = *mem; if (mem_p->mem_obj == NULL) rv = siw_rx_kva(srx, - (void *)(sge->laddr + frx->sge_off), - sge_bytes); + (void *)(uintptr_t)(sge->laddr + frx->sge_off), + sge_bytes); else if (!mem_p->is_pbl) rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + frx->sge_off, sge_bytes); @@ -598,8 +599,8 @@ int siw_proc_write(struct siw_qp *qp) if (mem->mem_obj == NULL) rv = siw_rx_kva(srx, - (void *)(srx->ddp_to + srx->fpdu_part_rcvd), - bytes); + (void *)(uintptr_t)(srx->ddp_to + srx->fpdu_part_rcvd), + bytes); else if (!mem->is_pbl) rv = siw_rx_umem(srx, mem->umem, srx->ddp_to + srx->fpdu_part_rcvd, bytes); @@ -841,8 +842,9 @@ int siw_proc_rresp(struct siw_qp *qp) bytes = min(srx->fpdu_part_rem, srx->skb_new); if (mem_p->mem_obj == NULL) - rv = siw_rx_kva(srx, (void *)(sge->laddr + wqe->processed), - bytes); + rv = siw_rx_kva(srx, + (void *)(uintptr_t)(sge->laddr + wqe->processed), + bytes); else if (!mem_p->is_pbl) rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed, bytes); diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 42c63622c7bd..438a2917a47c 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -26,7 +26,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) { struct siw_pbl *pbl = mem->pbl; u64 offset = addr - mem->va; - u64 paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx); + dma_addr_t paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx); if (paddr) return virt_to_page(paddr); @@ -37,7 +37,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) /* * Copy short payload at provided destination payload address */ -static int siw_try_1seg(struct siw_iwarp_tx *c_tx, u64 paddr) +static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) { struct siw_wqe *wqe = &c_tx->wqe_active; struct siw_sge *sge = &wqe->sqe.sge[0]; @@ -50,16 +50,16 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, u64 paddr) return 0; if (tx_flags(wqe) & SIW_WQE_INLINE) { - memcpy((void *)paddr, &wqe->sqe.sge[1], bytes); + memcpy(paddr, &wqe->sqe.sge[1], bytes); } else { struct siw_mem *mem = wqe->mem[0]; if (!mem->mem_obj) { /* Kernel client using kva */ - memcpy((void *)paddr, (void *)sge->laddr, bytes); + memcpy(paddr, + (const void *)(uintptr_t)sge->laddr, bytes); } else if (c_tx->in_syscall) { - if (copy_from_user((void *)paddr, - (const void __user *)sge->laddr, + if (copy_from_user(paddr, u64_to_user_ptr(sge->laddr), bytes)) return -EFAULT; } else { @@ -79,12 +79,12 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, u64 paddr) buffer = kmap_atomic(p); if (likely(PAGE_SIZE - off >= bytes)) { - memcpy((void *)paddr, buffer + off, bytes); + memcpy(paddr, buffer + off, bytes); kunmap_atomic(buffer); } else { unsigned long part = bytes - (PAGE_SIZE - off); - memcpy((void *)paddr, buffer + off, part); + memcpy(paddr, buffer + off, part); kunmap_atomic(buffer); if (!mem->is_pbl) @@ -98,7 +98,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, u64 paddr) return -EFAULT; buffer = kmap_atomic(p); - memcpy((void *)(paddr + part), buffer, + memcpy(paddr + part, buffer, bytes - part); kunmap_atomic(buffer); } @@ -166,7 +166,7 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) c_tx->ctrl_len = sizeof(struct iwarp_send); crc = (char *)&c_tx->pkt.send_pkt.crc; - data = siw_try_1seg(c_tx, (u64)crc); + data = siw_try_1seg(c_tx, crc); break; case SIW_OP_SEND_REMOTE_INV: @@ -189,7 +189,7 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) c_tx->ctrl_len = sizeof(struct iwarp_send_inv); crc = (char *)&c_tx->pkt.send_pkt.crc; - data = siw_try_1seg(c_tx, (u64)crc); + data = siw_try_1seg(c_tx, crc); break; case SIW_OP_WRITE: @@ -201,7 +201,7 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) c_tx->ctrl_len = sizeof(struct iwarp_rdma_write); crc = (char *)&c_tx->pkt.write_pkt.crc; - data = siw_try_1seg(c_tx, (u64)crc); + data = siw_try_1seg(c_tx, crc); break; case SIW_OP_READ_RESPONSE: @@ -216,7 +216,7 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) c_tx->ctrl_len = sizeof(struct iwarp_rdma_rresp); crc = (char *)&c_tx->pkt.write_pkt.crc; - data = siw_try_1seg(c_tx, (u64)crc); + data = siw_try_1seg(c_tx, crc); break; default: @@ -471,7 +471,8 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) * tx from kernel virtual address: either inline data * or memory region with assigned kernel buffer */ - iov[seg].iov_base = (void *)(sge->laddr + sge_off); + iov[seg].iov_base = + (void *)(uintptr_t)(sge->laddr + sge_off); iov[seg].iov_len = sge_len; if (do_crc) @@ -523,13 +524,13 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) page_address(p) + fp_off, plen); } else { - u64 pa = ((sge->laddr + sge_off) & PAGE_MASK); + u64 va = sge->laddr + sge_off; - page_array[seg] = virt_to_page(pa); + page_array[seg] = virt_to_page(va & PAGE_MASK); if (do_crc) crypto_shash_update( c_tx->mpa_crc_hd, - (void *)(sge->laddr + sge_off), + (void *)(uintptr_t)va, plen); } @@ -822,7 +823,8 @@ static int siw_qp_sq_proc_tx(struct siw_qp *qp, struct siw_wqe *wqe) rv = -EINVAL; goto tx_error; } - wqe->sqe.sge[0].laddr = (u64)&wqe->sqe.sge[1]; + wqe->sqe.sge[0].laddr = + (u64)(uintptr_t)&wqe->sqe.sge[1]; } } wqe->wr_status = SIW_WR_INPROGRESS; @@ -917,7 +919,7 @@ tx_error: static int siw_fastreg_mr(struct ib_pd *pd, struct siw_sqe *sqe) { - struct ib_mr *base_mr = (struct ib_mr *)sqe->base_mr; + struct ib_mr *base_mr = (struct ib_mr *)(uintptr_t)sqe->base_mr; struct siw_device *sdev = to_siw_dev(pd->device); struct siw_mem *mem = siw_mem_id2obj(sdev, sqe->rkey >> 8); int rv = 0; @@ -947,8 +949,7 @@ static int siw_fastreg_mr(struct ib_pd *pd, struct siw_sqe *sqe) mem->stag = sqe->rkey; mem->perms = sqe->access; - siw_dbg_mem(mem, "STag now valid, MR va: 0x%016llx -> 0x%016llx\n", - mem->va, base_mr->iova); + siw_dbg_mem(mem, "STag 0x%08x now valid\n", sqe->rkey); mem->va = base_mr->iova; mem->stag_valid = 1; out: diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index e7f3a2379d9d..da52c90e06d4 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -424,8 +424,7 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, */ qp->srq = to_siw_srq(attrs->srq); qp->attrs.rq_size = 0; - siw_dbg(base_dev, "QP [%u]: [SRQ 0x%p] attached\n", - qp->qp_num, qp->srq); + siw_dbg(base_dev, "QP [%u]: SRQ attached\n", qp->qp_num); } else if (num_rqe) { if (qp->kernel_verbs) qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe)); @@ -610,7 +609,7 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) base_ucontext); struct siw_qp_attrs qp_attrs; - siw_dbg_qp(qp, "state %d, cep 0x%p\n", qp->attrs.state, qp->cep); + siw_dbg_qp(qp, "state %d\n", qp->attrs.state); /* * Mark QP as in process of destruction to prevent from @@ -662,7 +661,7 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, void *kbuf = &sqe->sge[1]; int num_sge = core_wr->num_sge, bytes = 0; - sqe->sge[0].laddr = (u64)kbuf; + sqe->sge[0].laddr = (uintptr_t)kbuf; sqe->sge[0].lkey = 0; while (num_sge--) { @@ -825,7 +824,7 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, break; case IB_WR_REG_MR: - sqe->base_mr = (uint64_t)reg_wr(wr)->mr; + sqe->base_mr = (uintptr_t)reg_wr(wr)->mr; sqe->rkey = reg_wr(wr)->key; sqe->access = reg_wr(wr)->access & IWARP_ACCESS_MASK; sqe->opcode = SIW_OP_REG_MR; @@ -842,8 +841,9 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, rv = -EINVAL; break; } - siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%p\n", - sqe->opcode, sqe->flags, (void *)sqe->id); + siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%pK\n", + sqe->opcode, sqe->flags, + (void *)(uintptr_t)sqe->id); if (unlikely(rv < 0)) break; @@ -1205,8 +1205,8 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK); int rv; - siw_dbg_pd(pd, "start: 0x%016llx, va: 0x%016llx, len: %llu\n", - (unsigned long long)start, (unsigned long long)rnic_va, + siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n", + (void *)(uintptr_t)start, (void *)(uintptr_t)rnic_va, (unsigned long long)len); if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) { @@ -1363,7 +1363,7 @@ int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, struct siw_mem *mem = mr->mem; struct siw_pbl *pbl = mem->pbl; struct siw_pble *pble; - u64 pbl_size; + unsigned long pbl_size; int i, rv; if (!pbl) { @@ -1402,16 +1402,18 @@ int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, pbl_size += sg_dma_len(slp); } siw_dbg_mem(mem, - "sge[%d], size %llu, addr 0x%016llx, total %llu\n", - i, pble->size, pble->addr, pbl_size); + "sge[%d], size %u, addr 0x%p, total %lu\n", + i, pble->size, (void *)(uintptr_t)pble->addr, + pbl_size); } rv = ib_sg_to_pages(base_mr, sl, num_sle, sg_off, siw_set_pbl_page); if (rv > 0) { mem->len = base_mr->length; mem->va = base_mr->iova; siw_dbg_mem(mem, - "%llu bytes, start 0x%016llx, %u SLE to %u entries\n", - mem->len, mem->va, num_sle, pbl->num_buf); + "%llu bytes, start 0x%pK, %u SLE to %u entries\n", + mem->len, (void *)(uintptr_t)mem->va, num_sle, + pbl->num_buf); } return rv; } @@ -1529,7 +1531,7 @@ int siw_create_srq(struct ib_srq *base_srq, } spin_lock_init(&srq->lock); - siw_dbg_pd(base_srq->pd, "[SRQ 0x%p]: success\n", srq); + siw_dbg_pd(base_srq->pd, "[SRQ]: success\n"); return 0; @@ -1650,8 +1652,7 @@ int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, if (unlikely(!srq->kernel_verbs)) { siw_dbg_pd(base_srq->pd, - "[SRQ 0x%p]: no kernel post_recv for mapped srq\n", - srq); + "[SRQ]: no kernel post_recv for mapped srq\n"); rv = -EINVAL; goto out; } @@ -1673,8 +1674,7 @@ int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, } if (unlikely(wr->num_sge > srq->max_sge)) { siw_dbg_pd(base_srq->pd, - "[SRQ 0x%p]: too many sge's: %d\n", srq, - wr->num_sge); + "[SRQ]: too many sge's: %d\n", wr->num_sge); rv = -EINVAL; break; } @@ -1693,7 +1693,7 @@ int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, spin_unlock_irqrestore(&srq->lock, flags); out: if (unlikely(rv < 0)) { - siw_dbg_pd(base_srq->pd, "[SRQ 0x%p]: error %d\n", srq, rv); + siw_dbg_pd(base_srq->pd, "[SRQ]: error %d\n", rv); *bad_wr = wr; } return rv; -- cgit v1.2.1 From a15d56a60760aa9dbe26343b9a0ac5228f35d445 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 22 Aug 2019 08:55:36 +0200 Subject: batman-adv: Only read OGM tvlv_len after buffer len check Multiple batadv_ogm_packet can be stored in an skbuff. The functions batadv_iv_ogm_send_to_if()/batadv_iv_ogm_receive() use batadv_iv_ogm_aggr_packet() to check if there is another additional batadv_ogm_packet in the skb or not before they continue processing the packet. The length for such an OGM is BATADV_OGM_HLEN + batadv_ogm_packet->tvlv_len. The check must first check that at least BATADV_OGM_HLEN bytes are available before it accesses tvlv_len (which is part of the header. Otherwise it might try read outside of the currently available skbuff to get the content of tvlv_len. Fixes: ef26157747d4 ("batman-adv: tvlv - basic infrastructure") Reported-by: syzbot+355cab184197dbbfa384@syzkaller.appspotmail.com Signed-off-by: Sven Eckelmann Acked-by: Antonio Quartulli Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_iv_ogm.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 240ed70912d6..d78938e3e008 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -277,17 +277,23 @@ static u8 batadv_hop_penalty(u8 tq, const struct batadv_priv *bat_priv) * batadv_iv_ogm_aggr_packet() - checks if there is another OGM attached * @buff_pos: current position in the skb * @packet_len: total length of the skb - * @tvlv_len: tvlv length of the previously considered OGM + * @ogm_packet: potential OGM in buffer * * Return: true if there is enough space for another OGM, false otherwise. */ -static bool batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len, - __be16 tvlv_len) +static bool +batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len, + const struct batadv_ogm_packet *ogm_packet) { int next_buff_pos = 0; - next_buff_pos += buff_pos + BATADV_OGM_HLEN; - next_buff_pos += ntohs(tvlv_len); + /* check if there is enough space for the header */ + next_buff_pos += buff_pos + sizeof(*ogm_packet); + if (next_buff_pos > packet_len) + return false; + + /* check if there is enough space for the optional TVLV */ + next_buff_pos += ntohs(ogm_packet->tvlv_len); return (next_buff_pos <= packet_len) && (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES); @@ -315,7 +321,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, /* adjust all flags and log packets */ while (batadv_iv_ogm_aggr_packet(buff_pos, forw_packet->packet_len, - batadv_ogm_packet->tvlv_len)) { + batadv_ogm_packet)) { /* we might have aggregated direct link packets with an * ordinary base packet */ @@ -1704,7 +1710,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, /* unpack the aggregated packets and process them one by one */ while (batadv_iv_ogm_aggr_packet(ogm_offset, skb_headlen(skb), - ogm_packet->tvlv_len)) { + ogm_packet)) { batadv_iv_ogm_process(skb, ogm_offset, if_incoming); ogm_offset += BATADV_OGM_HLEN; -- cgit v1.2.1 From 0ff0f15a32c093381ad1abc06abe85afb561ab28 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 22 Aug 2019 08:55:36 +0200 Subject: batman-adv: Only read OGM2 tvlv_len after buffer len check Multiple batadv_ogm2_packet can be stored in an skbuff. The functions batadv_v_ogm_send_to_if() uses batadv_v_ogm_aggr_packet() to check if there is another additional batadv_ogm2_packet in the skb or not before they continue processing the packet. The length for such an OGM2 is BATADV_OGM2_HLEN + batadv_ogm2_packet->tvlv_len. The check must first check that at least BATADV_OGM2_HLEN bytes are available before it accesses tvlv_len (which is part of the header. Otherwise it might try read outside of the currently available skbuff to get the content of tvlv_len. Fixes: 9323158ef9f4 ("batman-adv: OGMv2 - implement originators logic") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v_ogm.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index fad95ef64e01..bc06e3cdfa84 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -631,17 +631,23 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv, * batadv_v_ogm_aggr_packet() - checks if there is another OGM aggregated * @buff_pos: current position in the skb * @packet_len: total length of the skb - * @tvlv_len: tvlv length of the previously considered OGM + * @ogm2_packet: potential OGM2 in buffer * * Return: true if there is enough space for another OGM, false otherwise. */ -static bool batadv_v_ogm_aggr_packet(int buff_pos, int packet_len, - __be16 tvlv_len) +static bool +batadv_v_ogm_aggr_packet(int buff_pos, int packet_len, + const struct batadv_ogm2_packet *ogm2_packet) { int next_buff_pos = 0; - next_buff_pos += buff_pos + BATADV_OGM2_HLEN; - next_buff_pos += ntohs(tvlv_len); + /* check if there is enough space for the header */ + next_buff_pos += buff_pos + sizeof(*ogm2_packet); + if (next_buff_pos > packet_len) + return false; + + /* check if there is enough space for the optional TVLV */ + next_buff_pos += ntohs(ogm2_packet->tvlv_len); return (next_buff_pos <= packet_len) && (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES); @@ -818,7 +824,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb, ogm_packet = (struct batadv_ogm2_packet *)skb->data; while (batadv_v_ogm_aggr_packet(ogm_offset, skb_headlen(skb), - ogm_packet->tvlv_len)) { + ogm_packet)) { batadv_v_ogm_process(skb, ogm_offset, if_incoming); ogm_offset += BATADV_OGM2_HLEN; -- cgit v1.2.1 From 2e16f3e926ed48373c98edea85c6ad0ef69425d1 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 23 Aug 2019 11:34:16 +0100 Subject: KVM: arm/arm64: VGIC: Properly initialise private IRQ affinity At the moment we initialise the target *mask* of a virtual IRQ to the VCPU it belongs to, even though this mask is only defined for GICv2 and quickly runs out of bits for many GICv3 guests. This behaviour triggers an UBSAN complaint for more than 32 VCPUs: ------ [ 5659.462377] UBSAN: Undefined behaviour in virt/kvm/arm/vgic/vgic-init.c:223:21 [ 5659.471689] shift exponent 32 is too large for 32-bit type 'unsigned int' ------ Also for GICv3 guests the reporting of TARGET in the "vgic-state" debugfs dump is wrong, due to this very same problem. Because there is no requirement to create the VGIC device before the VCPUs (and QEMU actually does it the other way round), we can't safely initialise mpidr or targets in kvm_vgic_vcpu_init(). But since we touch every private IRQ for each VCPU anyway later (in vgic_init()), we can just move the initialisation of those fields into there, where we definitely know the VGIC type. On the way make sure we really have either a VGICv2 or a VGICv3 device, since the existing code is just checking for "VGICv3 or not", silently ignoring the uninitialised case. Signed-off-by: Andre Przywara Reported-by: Dave Martin Tested-by: Julien Grall Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-init.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index bdbc297d06fb..e621b5d45b27 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "vgic.h" @@ -164,12 +165,18 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) irq->vcpu = NULL; irq->target_vcpu = vcpu0; kref_init(&irq->refcount); - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { + switch (dist->vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V2: irq->targets = 0; irq->group = 0; - } else { + break; + case KVM_DEV_TYPE_ARM_VGIC_V3: irq->mpidr = 0; irq->group = 1; + break; + default: + kfree(dist->spis); + return -EINVAL; } } return 0; @@ -209,7 +216,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) irq->intid = i; irq->vcpu = NULL; irq->target_vcpu = vcpu; - irq->targets = 1U << vcpu->vcpu_id; kref_init(&irq->refcount); if (vgic_irq_is_sgi(i)) { /* SGIs */ @@ -219,11 +225,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) /* PPIs */ irq->config = VGIC_CONFIG_LEVEL; } - - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) - irq->group = 1; - else - irq->group = 0; } if (!irqchip_in_kernel(vcpu->kvm)) @@ -286,10 +287,19 @@ int vgic_init(struct kvm *kvm) for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + switch (dist->vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V3: irq->group = 1; - else + irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + break; + case KVM_DEV_TYPE_ARM_VGIC_V2: irq->group = 0; + irq->targets = 1U << idx; + break; + default: + ret = -EINVAL; + goto out; + } } } -- cgit v1.2.1 From db0b99f59ae4d934a0af1a5670706d7c2a4b58ea Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 23 Aug 2019 15:44:36 +0200 Subject: ipv6: propagate ipv6_add_dev's error returns out of ipv6_find_idev Currently, ipv6_find_idev returns NULL when ipv6_add_dev fails, ignoring the specific error value. This results in addrconf_add_dev returning ENOBUFS in all cases, which is unfortunate in cases such as: # ip link add dummyX type dummy # ip link set dummyX mtu 1200 up # ip addr add 2000::/64 dev dummyX RTNETLINK answers: No buffer space available Commit a317a2f19da7 ("ipv6: fail early when creating netdev named all or default") introduced error returns in ipv6_add_dev. Before that, that function would simply return NULL for all failures. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ced995f3fec4..6a576ff92c39 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -478,7 +478,7 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev) if (!idev) { idev = ipv6_add_dev(dev); if (IS_ERR(idev)) - return NULL; + return idev; } if (dev->flags&IFF_UP) @@ -2466,8 +2466,8 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) ASSERT_RTNL(); idev = ipv6_find_idev(dev); - if (!idev) - return ERR_PTR(-ENOBUFS); + if (IS_ERR(idev)) + return idev; if (idev->cnf.disable_ipv6) return ERR_PTR(-EACCES); @@ -3159,7 +3159,7 @@ static void init_loopback(struct net_device *dev) ASSERT_RTNL(); idev = ipv6_find_idev(dev); - if (!idev) { + if (IS_ERR(idev)) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -3374,7 +3374,7 @@ static void addrconf_sit_config(struct net_device *dev) */ idev = ipv6_find_idev(dev); - if (!idev) { + if (IS_ERR(idev)) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -3399,7 +3399,7 @@ static void addrconf_gre_config(struct net_device *dev) ASSERT_RTNL(); idev = ipv6_find_idev(dev); - if (!idev) { + if (IS_ERR(idev)) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -4773,8 +4773,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC; idev = ipv6_find_idev(dev); - if (!idev) - return -ENOBUFS; + if (IS_ERR(idev)) + return PTR_ERR(idev); if (!ipv6_allow_optimistic_dad(net, idev)) cfg.ifa_flags &= ~IFA_F_OPTIMISTIC; -- cgit v1.2.1 From 345b93265b3a3d001ec23b696b66059395238d16 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 23 Aug 2019 19:57:49 +0200 Subject: Revert "r8169: remove not needed call to dma_sync_single_for_device" This reverts commit f072218cca5b076dd99f3dfa3aaafedfd0023a51. As reported by Aaro this patch causes network problems on MIPS Loongson platform. Therefore revert it. Fixes: f072218cca5b ("r8169: remove not needed call to dma_sync_single_for_device") Signed-off-by: Heiner Kallweit Reported-by: Aaro Koskinen Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index e1dd6ea60d67..bae0074ab9aa 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5921,6 +5921,7 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data, skb = napi_alloc_skb(&tp->napi, pkt_size); if (skb) skb_copy_to_linear_data(skb, data, pkt_size); + dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE); return skb; } -- cgit v1.2.1 From db38de39684dda2bf307f41797db2831deba64e9 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 21 Aug 2019 14:17:20 +0200 Subject: flow_dissector: Fix potential use-after-free on BPF_PROG_DETACH Call to bpf_prog_put(), with help of call_rcu(), queues an RCU-callback to free the program once a grace period has elapsed. The callback can run together with new RCU readers that started after the last grace period. New RCU readers can potentially see the "old" to-be-freed or already-freed pointer to the program object before the RCU update-side NULLs it. Reorder the operations so that the RCU update-side resets the protected pointer before the end of the grace period after which the program will be freed. Fixes: d58e468b1112 ("flow_dissector: implements flow dissector BPF hook") Reported-by: Lorenz Bauer Signed-off-by: Jakub Sitnicki Acked-by: Petar Penkov Signed-off-by: Daniel Borkmann --- net/core/flow_dissector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 3e6fedb57bc1..2470b4b404e6 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -142,8 +142,8 @@ int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) mutex_unlock(&flow_dissector_mutex); return -ENOENT; } - bpf_prog_put(attached); RCU_INIT_POINTER(net->flow_dissector_prog, NULL); + bpf_prog_put(attached); mutex_unlock(&flow_dissector_mutex); return 0; } -- cgit v1.2.1 From 6754172c208d9d3dae208c6494611ac167d56688 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 21 Aug 2019 14:07:10 -0700 Subject: bpf: fix precision tracking in presence of bpf2bpf calls While adding extra tests for precision tracking and extra infra to adjust verifier heuristics the existing test "calls: cross frame pruning - liveness propagation" started to fail. The root cause is the same as described in verifer.c comment: * Also if parent's curframe > frame where backtracking started, * the verifier need to mark registers in both frames, otherwise callees * may incorrectly prune callers. This is similar to * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences") * For now backtracking falls back into conservative marking. Turned out though that returning -ENOTSUPP from backtrack_insn() and doing mark_all_scalars_precise() in the current parentage chain is not enough. Depending on how is_state_visited() heuristic is creating parentage chain it's possible that callee will incorrectly prune caller. Fix the issue by setting precise=true earlier and more aggressively. Before this fix the precision tracking _within_ functions that don't do bpf2bpf calls would still work. Whereas now precision tracking is completely disabled when bpf2bpf calls are present anywhere in the program. No difference in cilium tests (they don't have bpf2bpf calls). No difference in test_progs though some of them have bpf2bpf calls, but precision tracking wasn't effective there. Fixes: b5dc0163d8fd ("bpf: precise scalar_value tracking") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c84d83f86141..b5c14c9d7b98 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -985,9 +985,6 @@ static void __mark_reg_unbounded(struct bpf_reg_state *reg) reg->smax_value = S64_MAX; reg->umin_value = 0; reg->umax_value = U64_MAX; - - /* constant backtracking is enabled for root only for now */ - reg->precise = capable(CAP_SYS_ADMIN) ? false : true; } /* Mark a register as having a completely unknown (scalar) value. */ @@ -1014,7 +1011,11 @@ static void mark_reg_unknown(struct bpf_verifier_env *env, __mark_reg_not_init(regs + regno); return; } - __mark_reg_unknown(regs + regno); + regs += regno; + __mark_reg_unknown(regs); + /* constant backtracking is enabled for root without bpf2bpf calls */ + regs->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ? + true : false; } static void __mark_reg_not_init(struct bpf_reg_state *reg) -- cgit v1.2.1 From c751798aa224fadc5124b49eeb38fb468c0fa039 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 Aug 2019 22:14:23 +0200 Subject: bpf: fix use after free in prog symbol exposure syzkaller managed to trigger the warning in bpf_jit_free() which checks via bpf_prog_kallsyms_verify_off() for potentially unlinked JITed BPF progs in kallsyms, and subsequently trips over GPF when walking kallsyms entries: [...] 8021q: adding VLAN 0 to HW filter on device batadv0 8021q: adding VLAN 0 to HW filter on device batadv0 WARNING: CPU: 0 PID: 9869 at kernel/bpf/core.c:810 bpf_jit_free+0x1e8/0x2a0 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 9869 Comm: kworker/0:7 Not tainted 5.0.0-rc8+ #1 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events bpf_prog_free_deferred Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x113/0x167 lib/dump_stack.c:113 panic+0x212/0x40b kernel/panic.c:214 __warn.cold.8+0x1b/0x38 kernel/panic.c:571 report_bug+0x1a4/0x200 lib/bug.c:186 fixup_bug arch/x86/kernel/traps.c:178 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:271 do_invalid_op+0x36/0x40 arch/x86/kernel/traps.c:290 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:973 RIP: 0010:bpf_jit_free+0x1e8/0x2a0 Code: 02 4c 89 e2 83 e2 07 38 d0 7f 08 84 c0 0f 85 86 00 00 00 48 ba 00 02 00 00 00 00 ad de 0f b6 43 02 49 39 d6 0f 84 5f fe ff ff <0f> 0b e9 58 fe ff ff 48 b8 00 00 00 00 00 fc ff df 4c 89 e2 48 c1 RSP: 0018:ffff888092f67cd8 EFLAGS: 00010202 RAX: 0000000000000007 RBX: ffffc90001947000 RCX: ffffffff816e9d88 RDX: dead000000000200 RSI: 0000000000000008 RDI: ffff88808769f7f0 RBP: ffff888092f67d00 R08: fffffbfff1394059 R09: fffffbfff1394058 R10: fffffbfff1394058 R11: ffffffff89ca02c7 R12: ffffc90001947002 R13: ffffc90001947020 R14: ffffffff881eca80 R15: ffff88808769f7e8 BUG: unable to handle kernel paging request at fffffbfff400d000 #PF error: [normal kernel read fault] PGD 21ffee067 P4D 21ffee067 PUD 21ffed067 PMD 9f942067 PTE 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 9869 Comm: kworker/0:7 Not tainted 5.0.0-rc8+ #1 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events bpf_prog_free_deferred RIP: 0010:bpf_get_prog_addr_region kernel/bpf/core.c:495 [inline] RIP: 0010:bpf_tree_comp kernel/bpf/core.c:558 [inline] RIP: 0010:__lt_find include/linux/rbtree_latch.h:115 [inline] RIP: 0010:latch_tree_find include/linux/rbtree_latch.h:208 [inline] RIP: 0010:bpf_prog_kallsyms_find+0x107/0x2e0 kernel/bpf/core.c:632 Code: 00 f0 ff ff 44 38 c8 7f 08 84 c0 0f 85 fa 00 00 00 41 f6 45 02 01 75 02 0f 0b 48 39 da 0f 82 92 00 00 00 48 89 d8 48 c1 e8 03 <42> 0f b6 04 30 84 c0 74 08 3c 03 0f 8e 45 01 00 00 8b 03 48 c1 e0 [...] Upon further debugging, it turns out that whenever we trigger this issue, the kallsyms removal in bpf_prog_ksym_node_del() was /skipped/ but yet bpf_jit_free() reported that the entry is /in use/. Problem is that symbol exposure via bpf_prog_kallsyms_add() but also perf_event_bpf_event() were done /after/ bpf_prog_new_fd(). Once the fd is exposed to the public, a parallel close request came in right before we attempted to do the bpf_prog_kallsyms_add(). Given at this time the prog reference count is one, we start to rip everything underneath us via bpf_prog_release() -> bpf_prog_put(). The memory is eventually released via deferred free, so we're seeing that bpf_jit_free() has a kallsym entry because we added it from bpf_prog_load() but /after/ bpf_prog_put() from the remote CPU. Therefore, move both notifications /before/ we install the fd. The issue was never seen between bpf_prog_alloc_id() and bpf_prog_new_fd() because upon bpf_prog_get_fd_by_id() we'll take another reference to the BPF prog, so we're still holding the original reference from the bpf_prog_load(). Fixes: 6ee52e2a3fe4 ("perf, bpf: Introduce PERF_RECORD_BPF_EVENT") Fixes: 74451e66d516 ("bpf: make jited programs visible in traces") Reported-by: syzbot+bd3bba6ff3fcea7a6ec6@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Cc: Song Liu --- kernel/bpf/syscall.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5d141f16f6fa..272071e9112f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1707,20 +1707,26 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) if (err) goto free_used_maps; - err = bpf_prog_new_fd(prog); - if (err < 0) { - /* failed to allocate fd. - * bpf_prog_put() is needed because the above - * bpf_prog_alloc_id() has published the prog - * to the userspace and the userspace may - * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID. - */ - bpf_prog_put(prog); - return err; - } - + /* Upon success of bpf_prog_alloc_id(), the BPF prog is + * effectively publicly exposed. However, retrieving via + * bpf_prog_get_fd_by_id() will take another reference, + * therefore it cannot be gone underneath us. + * + * Only for the time /after/ successful bpf_prog_new_fd() + * and before returning to userspace, we might just hold + * one reference and any parallel close on that fd could + * rip everything out. Hence, below notifications must + * happen before bpf_prog_new_fd(). + * + * Also, any failure handling from this point onwards must + * be using bpf_prog_put() given the program is exposed. + */ bpf_prog_kallsyms_add(prog); perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); + + err = bpf_prog_new_fd(prog); + if (err < 0) + bpf_prog_put(prog); return err; free_used_maps: -- cgit v1.2.1 From 2c238177bd7f4b14bdf7447cc1cd9bb791f147e6 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 20 Aug 2019 17:50:25 +0200 Subject: bpf: allow narrow loads of some sk_reuseport_md fields with offset > 0 test_select_reuseport fails on s390 due to verifier rejecting test_select_reuseport_kern.o with the following message: ; data_check.eth_protocol = reuse_md->eth_protocol; 18: (69) r1 = *(u16 *)(r6 +22) invalid bpf_context access off=22 size=2 This is because on big-endian machines casts from __u32 to __u16 are generated by referencing the respective variable as __u16 with an offset of 2 (as opposed to 0 on little-endian machines). The verifier already has all the infrastructure in place to allow such accesses, it's just that they are not explicitly enabled for eth_protocol field. Enable them for eth_protocol field by using bpf_ctx_range instead of offsetof. Ditto for ip_protocol, bind_inany and len, since they already allow narrowing, and the same problem can arise when working with them. Fixes: 2dbb9b9e6df6 ("bpf: Introduce BPF_PROG_TYPE_SK_REUSEPORT") Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann --- net/core/filter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 7878f918b8c0..4c6a252d4212 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8757,13 +8757,13 @@ sk_reuseport_is_valid_access(int off, int size, return size == size_default; /* Fields that allow narrowing */ - case offsetof(struct sk_reuseport_md, eth_protocol): + case bpf_ctx_range(struct sk_reuseport_md, eth_protocol): if (size < FIELD_SIZEOF(struct sk_buff, protocol)) return false; /* fall through */ - case offsetof(struct sk_reuseport_md, ip_protocol): - case offsetof(struct sk_reuseport_md, bind_inany): - case offsetof(struct sk_reuseport_md, len): + case bpf_ctx_range(struct sk_reuseport_md, ip_protocol): + case bpf_ctx_range(struct sk_reuseport_md, bind_inany): + case bpf_ctx_range(struct sk_reuseport_md, len): bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); -- cgit v1.2.1 From 7837951a12fdaf88d2c51ff0757980c00072790c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 24 Aug 2019 15:07:07 +1000 Subject: drm/mediatek: include dma-mapping header Although it builds fine here in my arm cross compile, it seems either via some other patches in -next or some Kconfig combination, this fails to build for everyone. Include linux/dma-mapping.h should fix it. Signed-off-by: Dave Airlie --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 7f5408cb2377..945bc20f1d33 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "mtk_drm_crtc.h" #include "mtk_drm_ddp.h" -- cgit v1.2.1 From 12c6bc38f99bb168b7f16bdb5e855a51a23ee9ec Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Wed, 21 Aug 2019 17:16:10 -0700 Subject: openvswitch: Fix log message in ovs conntrack Fixes: 06bd2bdf19d2 ("openvswitch: Add timeout support to ct action") Signed-off-by: Yi-Hung Wei Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 848c6eb55064..a1852e035ebb 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1565,7 +1565,7 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, case OVS_CT_ATTR_TIMEOUT: memcpy(info->timeout, nla_data(a), nla_len(a)); if (!memchr(info->timeout, '\0', nla_len(a))) { - OVS_NLERR(log, "Invalid conntrack helper"); + OVS_NLERR(log, "Invalid conntrack timeout"); return -EINVAL; } break; -- cgit v1.2.1 From e2c693934194fd3b4e795635934883354c06ebc9 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 22 Aug 2019 22:19:48 +0800 Subject: ipv4/icmp: fix rt dst dev null pointer dereference In __icmp_send() there is a possibility that the rt->dst.dev is NULL, e,g, with tunnel collect_md mode, which will cause kernel crash. Here is what the code path looks like, for GRE: - ip6gre_tunnel_xmit - ip6gre_xmit_ipv4 - __gre6_xmit - ip6_tnl_xmit - if skb->len - t->tun_hlen - eth_hlen > mtu; return -EMSGSIZE - icmp_send - net = dev_net(rt->dst.dev); <-- here The reason is __metadata_dst_init() init dst->dev to NULL by default. We could not fix it in __metadata_dst_init() as there is no dev supplied. On the other hand, the reason we need rt->dst.dev is to get the net. So we can just try get it from skb->dev when rt->dst.dev is NULL. v4: Julian Anastasov remind skb->dev also could be NULL. We'd better still use dst.dev and do a check to avoid crash. v3: No changes. v2: fix the issue in __icmp_send() instead of updating shared dst dev in {ip_md, ip6}_tunnel_xmit. Fixes: c8b34e680a09 ("ip_tunnel: Add tnl_update_pmtu in ip_md_tunnel_xmit") Signed-off-by: Hangbin Liu Reviewed-by: Julian Anastasov Acked-by: Jonathan Lemon Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index bf7b5d45de99..4298aae74e0e 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -582,7 +582,13 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, if (!rt) goto out; - net = dev_net(rt->dst.dev); + + if (rt->dst.dev) + net = dev_net(rt->dst.dev); + else if (skb_in->dev) + net = dev_net(skb_in->dev); + else + goto out; /* * Find the original header. It is expected to be valid, of course. -- cgit v1.2.1 From c3b4c3a47e05d5fecf7354d75824a9d1b37f3e84 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 22 Aug 2019 22:19:49 +0800 Subject: xfrm/xfrm_policy: fix dst dev null pointer dereference in collect_md mode In decode_session{4,6} there is a possibility that the skb dst dev is NULL, e,g, with tunnel collect_md mode, which will cause kernel crash. Here is what the code path looks like, for GRE: - ip6gre_tunnel_xmit - ip6gre_xmit_ipv6 - __gre6_xmit - ip6_tnl_xmit - if skb->len - t->tun_hlen - eth_hlen > mtu; return -EMSGSIZE - icmpv6_send - icmpv6_route_lookup - xfrm_decode_session_reverse - decode_session4 - oif = skb_dst(skb)->dev->ifindex; <-- here - decode_session6 - oif = skb_dst(skb)->dev->ifindex; <-- here The reason is __metadata_dst_init() init dst->dev to NULL by default. We could not fix it in __metadata_dst_init() as there is no dev supplied. On the other hand, the skb_dst(skb)->dev is actually not needed as we called decode_session{4,6} via xfrm_decode_session_reverse(), so oif is not used by: fl4->flowi4_oif = reverse ? skb->skb_iif : oif; So make a dst dev check here should be clean and safe. v4: No changes. v3: No changes. v2: fix the issue in decode_session{4,6} instead of updating shared dst dev in {ip_md, ip6}_tunnel_xmit. Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels") Signed-off-by: Hangbin Liu Tested-by: Jonathan Lemon Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8ca637a72697..ec94f5795ea4 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3269,7 +3269,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) struct flowi4 *fl4 = &fl->u.ip4; int oif = 0; - if (skb_dst(skb)) + if (skb_dst(skb) && skb_dst(skb)->dev) oif = skb_dst(skb)->dev->ifindex; memset(fl4, 0, sizeof(struct flowi4)); @@ -3387,7 +3387,7 @@ decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) nexthdr = nh[nhoff]; - if (skb_dst(skb)) + if (skb_dst(skb) && skb_dst(skb)->dev) oif = skb_dst(skb)->dev->ifindex; memset(fl6, 0, sizeof(struct flowi6)); -- cgit v1.2.1 From 0c69b19f92dfcc0962bbc09741677f658bc55452 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 25 Aug 2019 00:34:54 +0200 Subject: MAINTAINERS: Add phylink keyword to SFF/SFP/SFP+ MODULE SUPPORT Russell king maintains phylink, as part of the SFP module support. However, much of the review work is about drivers swapping from phylib to phylink. Such changes don't make changes to the phylink core, and so the F: rules in MAINTAINERS don't match. Add a K:, keywork rule, which hopefully get_maintainers will match against for patches to MAC drivers swapping to phylink. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index a744851db1df..37a0e297cf28 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14472,6 +14472,7 @@ F: drivers/net/phy/phylink.c F: drivers/net/phy/sfp* F: include/linux/phylink.h F: include/linux/sfp.h +K: phylink SGI GRU DRIVER M: Dimitri Sivanich -- cgit v1.2.1 From b45ce32135d1c82a5bf12aa56957c3fd27956057 Mon Sep 17 00:00:00 2001 From: zhanglin Date: Fri, 23 Aug 2019 09:14:11 +0800 Subject: sock: fix potential memory leak in proto_register() If protocols registered exceeded PROTO_INUSE_NR, prot will be added to proto_list, but no available bit left for prot in proto_inuse_idx. Changes since v2: * Propagate the error code properly Signed-off-by: zhanglin Signed-off-by: David S. Miller --- net/core/sock.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 6d08553f885c..545fac19a711 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3287,16 +3287,17 @@ static __init int net_inuse_init(void) core_initcall(net_inuse_init); -static void assign_proto_idx(struct proto *prot) +static int assign_proto_idx(struct proto *prot) { prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { pr_err("PROTO_INUSE_NR exhausted\n"); - return; + return -ENOSPC; } set_bit(prot->inuse_idx, proto_inuse_idx); + return 0; } static void release_proto_idx(struct proto *prot) @@ -3305,8 +3306,9 @@ static void release_proto_idx(struct proto *prot) clear_bit(prot->inuse_idx, proto_inuse_idx); } #else -static inline void assign_proto_idx(struct proto *prot) +static inline int assign_proto_idx(struct proto *prot) { + return 0; } static inline void release_proto_idx(struct proto *prot) @@ -3355,6 +3357,8 @@ static int req_prot_init(const struct proto *prot) int proto_register(struct proto *prot, int alloc_slab) { + int ret = -ENOBUFS; + if (alloc_slab) { prot->slab = kmem_cache_create_usercopy(prot->name, prot->obj_size, 0, @@ -3391,20 +3395,27 @@ int proto_register(struct proto *prot, int alloc_slab) } mutex_lock(&proto_list_mutex); + ret = assign_proto_idx(prot); + if (ret) { + mutex_unlock(&proto_list_mutex); + goto out_free_timewait_sock_slab_name; + } list_add(&prot->node, &proto_list); - assign_proto_idx(prot); mutex_unlock(&proto_list_mutex); - return 0; + return ret; out_free_timewait_sock_slab_name: - kfree(prot->twsk_prot->twsk_slab_name); + if (alloc_slab && prot->twsk_prot) + kfree(prot->twsk_prot->twsk_slab_name); out_free_request_sock_slab: - req_prot_cleanup(prot->rsk_prot); + if (alloc_slab) { + req_prot_cleanup(prot->rsk_prot); - kmem_cache_destroy(prot->slab); - prot->slab = NULL; + kmem_cache_destroy(prot->slab); + prot->slab = NULL; + } out: - return -ENOBUFS; + return ret; } EXPORT_SYMBOL(proto_register); -- cgit v1.2.1 From 292a50e3fc2cf699587ea282e6253e0d6ae3cdc1 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 23 Aug 2019 11:29:23 +0200 Subject: s390/qeth: reject oversized SNMP requests Commit d4c08afafa04 ("s390/qeth: streamline SNMP cmd code") removed the bounds checking for req_len, under the assumption that the check in qeth_alloc_cmd() would suffice. But that code path isn't sufficiently robust to handle a user-provided data_length, which could overflow (when adding the cmd header overhead) before being checked against QETH_BUFSIZE. We end up allocating just a tiny iob, and the subsequent copy_from_user() writes past the end of that iob. Special-case this path and add a coarse bounds check, to protect against maliciuous requests. This let's the subsequent code flow do its normal job and precise checking, without risk of overflow. Fixes: d4c08afafa04 ("s390/qeth: streamline SNMP cmd code") Reported-by: Dan Carpenter Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 9c3310c4d61d..6502b148541e 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4374,6 +4374,10 @@ static int qeth_snmp_command(struct qeth_card *card, char __user *udata) get_user(req_len, &ureq->hdr.req_len)) return -EFAULT; + /* Sanitize user input, to avoid overflows in iob size calculation: */ + if (req_len > QETH_BUFSIZE) + return -EINVAL; + iob = qeth_get_adapter_cmd(card, IPA_SETADP_SET_SNMP_CONTROL, req_len); if (!iob) return -ENOMEM; -- cgit v1.2.1 From e93fb3e9521abffadb8f965c591a290cdd92b56c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 23 Aug 2019 17:11:38 -0700 Subject: net: route dump netlink NLM_F_MULTI flag missing An excerpt from netlink(7) man page, In multipart messages (multiple nlmsghdr headers with associated payload in one byte stream) the first and all following headers have the NLM_F_MULTI flag set, except for the last header which has the type NLMSG_DONE. but, after (ee28906) there is a missing NLM_F_MULTI flag in the middle of a FIB dump. The result is user space applications following above man page excerpt may get confused and may stop parsing msg believing something went wrong. In the golang netlink lib [0] the library logic stops parsing believing the message is not a multipart message. Found this running Cilium[1] against net-next while adding a feature to auto-detect routes. I noticed with multiple route tables we no longer could detect the default routes on net tree kernels because the library logic was not returning them. Fix this by handling the fib_dump_info_fnhe() case the same way the fib_dump_info() handles it by passing the flags argument through the call chain and adding a flags argument to rt_fill_info(). Tested with Cilium stack and auto-detection of routes works again. Also annotated libs to dump netlink msgs and inspected NLM_F_MULTI and NLMSG_DONE flags look correct after this. Note: In inet_rtm_getroute() pass rt_fill_info() '0' for flags the same as is done for fib_dump_info() so this looks correct to me. [0] https://github.com/vishvananda/netlink/ [1] https://github.com/cilium/ Fixes: ee28906fd7a14 ("ipv4: Dump route exceptions if requested") Signed-off-by: John Fastabend Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- include/net/route.h | 2 +- net/ipv4/fib_trie.c | 2 +- net/ipv4/route.c | 17 ++++++++++------- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index 630a0493f1f3..dfce19c9fa96 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -233,7 +233,7 @@ void rt_del_uncached_list(struct rtable *rt); int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, u32 table_id, struct fib_info *fi, - int *fa_index, int fa_start); + int *fa_index, int fa_start, unsigned int flags); static inline void ip_rt_put(struct rtable *rt) { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2b2b3d291ab0..1ab2fb6bb37d 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2145,7 +2145,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, if (filter->dump_exceptions) { err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi, - &i_fa, s_fa); + &i_fa, s_fa, flags); if (err < 0) goto stop; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 517300d587a7..b6a6f18c3dd1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2728,7 +2728,8 @@ EXPORT_SYMBOL_GPL(ip_route_output_flow); /* called with rcu_read_lock held */ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, struct rtable *rt, u32 table_id, struct flowi4 *fl4, - struct sk_buff *skb, u32 portid, u32 seq) + struct sk_buff *skb, u32 portid, u32 seq, + unsigned int flags) { struct rtmsg *r; struct nlmsghdr *nlh; @@ -2736,7 +2737,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 error; u32 metrics[RTAX_MAX]; - nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0); + nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), flags); if (!nlh) return -EMSGSIZE; @@ -2860,7 +2861,7 @@ nla_put_failure: static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, u32 table_id, struct fnhe_hash_bucket *bucket, int genid, - int *fa_index, int fa_start) + int *fa_index, int fa_start, unsigned int flags) { int i; @@ -2891,7 +2892,7 @@ static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb, err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt, table_id, NULL, skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq); + cb->nlh->nlmsg_seq, flags); if (err) return err; next: @@ -2904,7 +2905,7 @@ next: int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, u32 table_id, struct fib_info *fi, - int *fa_index, int fa_start) + int *fa_index, int fa_start, unsigned int flags) { struct net *net = sock_net(cb->skb->sk); int nhsel, genid = fnhe_genid(net); @@ -2922,7 +2923,8 @@ int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, err = 0; if (bucket) err = fnhe_dump_bucket(net, skb, cb, table_id, bucket, - genid, fa_index, fa_start); + genid, fa_index, fa_start, + flags); rcu_read_unlock(); if (err) return err; @@ -3183,7 +3185,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.flowi4_tos, res.fi, 0); } else { err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb, - NETLINK_CB(in_skb).portid, nlh->nlmsg_seq); + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0); } if (err < 0) goto errout_rcu; -- cgit v1.2.1 From e0e6d062822529dbe9be21939359b0d1e065bb0f Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Fri, 23 Aug 2019 21:04:16 -0400 Subject: net: rds: add service level support in rds-info >From IB specific 7.6.5 SERVICE LEVEL, Service Level (SL) is used to identify different flows within an IBA subnet. It is carried in the local route header of the packet. Before this commit, run "rds-info -I". The outputs are as below: " RDS IB Connections: LocalAddr RemoteAddr Tos SL LocalDev RemoteDev 192.2.95.3 192.2.95.1 2 0 fe80::21:28:1a:39 fe80::21:28:10:b9 192.2.95.3 192.2.95.1 1 0 fe80::21:28:1a:39 fe80::21:28:10:b9 192.2.95.3 192.2.95.1 0 0 fe80::21:28:1a:39 fe80::21:28:10:b9 " After this commit, the output is as below: " RDS IB Connections: LocalAddr RemoteAddr Tos SL LocalDev RemoteDev 192.2.95.3 192.2.95.1 2 2 fe80::21:28:1a:39 fe80::21:28:10:b9 192.2.95.3 192.2.95.1 1 1 fe80::21:28:1a:39 fe80::21:28:10:b9 192.2.95.3 192.2.95.1 0 0 fe80::21:28:1a:39 fe80::21:28:10:b9 " The commit fe3475af3bdf ("net: rds: add per rds connection cache statistics") adds cache_allocs in struct rds_info_rdma_connection as below: struct rds_info_rdma_connection { ... __u32 rdma_mr_max; __u32 rdma_mr_size; __u8 tos; __u32 cache_allocs; }; The peer struct in rds-tools of struct rds_info_rdma_connection is as below: struct rds_info_rdma_connection { ... uint32_t rdma_mr_max; uint32_t rdma_mr_size; uint8_t tos; uint8_t sl; uint32_t cache_allocs; }; The difference between userspace and kernel is the member variable sl. In the kernel struct, the member variable sl is missing. This will introduce risks. So it is necessary to use this commit to avoid this risk. Fixes: fe3475af3bdf ("net: rds: add per rds connection cache statistics") CC: Joe Jin CC: JUNXIAO_BI Suggested-by: Gerd Rausch Signed-off-by: Zhu Yanjun Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- include/uapi/linux/rds.h | 2 ++ net/rds/ib.c | 16 ++++++++++------ net/rds/ib.h | 1 + net/rds/ib_cm.c | 3 +++ net/rds/rdma_transport.c | 10 ++++++++-- 5 files changed, 24 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index fd6b5f66e2c5..cba368e55863 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -250,6 +250,7 @@ struct rds_info_rdma_connection { __u32 rdma_mr_max; __u32 rdma_mr_size; __u8 tos; + __u8 sl; __u32 cache_allocs; }; @@ -265,6 +266,7 @@ struct rds6_info_rdma_connection { __u32 rdma_mr_max; __u32 rdma_mr_size; __u8 tos; + __u8 sl; __u32 cache_allocs; }; diff --git a/net/rds/ib.c b/net/rds/ib.c index ec05d91aa9a2..45acab2de0cf 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -291,7 +291,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, void *buffer) { struct rds_info_rdma_connection *iinfo = buffer; - struct rds_ib_connection *ic; + struct rds_ib_connection *ic = conn->c_transport_data; /* We will only ever look at IB transports */ if (conn->c_trans != &rds_ib_transport) @@ -301,15 +301,16 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, iinfo->src_addr = conn->c_laddr.s6_addr32[3]; iinfo->dst_addr = conn->c_faddr.s6_addr32[3]; - iinfo->tos = conn->c_tos; + if (ic) { + iinfo->tos = conn->c_tos; + iinfo->sl = ic->i_sl; + } memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid)); memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); if (rds_conn_state(conn) == RDS_CONN_UP) { struct rds_ib_device *rds_ibdev; - ic = conn->c_transport_data; - rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid, (union ib_gid *)&iinfo->dst_gid); @@ -329,7 +330,7 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn, void *buffer) { struct rds6_info_rdma_connection *iinfo6 = buffer; - struct rds_ib_connection *ic; + struct rds_ib_connection *ic = conn->c_transport_data; /* We will only ever look at IB transports */ if (conn->c_trans != &rds_ib_transport) @@ -337,6 +338,10 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn, iinfo6->src_addr = conn->c_laddr; iinfo6->dst_addr = conn->c_faddr; + if (ic) { + iinfo6->tos = conn->c_tos; + iinfo6->sl = ic->i_sl; + } memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid)); memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid)); @@ -344,7 +349,6 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn, if (rds_conn_state(conn) == RDS_CONN_UP) { struct rds_ib_device *rds_ibdev; - ic = conn->c_transport_data; rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo6->src_gid, (union ib_gid *)&iinfo6->dst_gid); rds_ibdev = ic->rds_ibdev; diff --git a/net/rds/ib.h b/net/rds/ib.h index 303c6ee8bdb7..f2b558e8b5ea 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -220,6 +220,7 @@ struct rds_ib_connection { /* Send/Recv vectors */ int i_scq_vector; int i_rcq_vector; + u8 i_sl; }; /* This assumes that atomic_t is at least 32 bits */ diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index fddaa09f7b0d..233f1368162b 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -152,6 +152,9 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even RDS_PROTOCOL_MINOR(conn->c_version), ic->i_flowctl ? ", flow control" : ""); + /* receive sl from the peer */ + ic->i_sl = ic->i_cm_id->route.path_rec->sl; + atomic_set(&ic->i_cq_quiesce, 0); /* Init rings and fill recv. this needs to wait until protocol diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 9986d6065c4d..5f741e51b4ba 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -43,6 +43,9 @@ static struct rdma_cm_id *rds_rdma_listen_id; static struct rdma_cm_id *rds6_rdma_listen_id; #endif +/* Per IB specification 7.7.3, service level is a 4-bit field. */ +#define TOS_TO_SL(tos) ((tos) & 0xF) + static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, struct rdma_cm_event *event, bool isv6) @@ -97,10 +100,13 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, struct rds_ib_connection *ibic; ibic = conn->c_transport_data; - if (ibic && ibic->i_cm_id == cm_id) + if (ibic && ibic->i_cm_id == cm_id) { + cm_id->route.path_rec[0].sl = + TOS_TO_SL(conn->c_tos); ret = trans->cm_initiate_connect(cm_id, isv6); - else + } else { rds_conn_drop(conn); + } } break; -- cgit v1.2.1 From d776aaa9895eb6eb770908e899cb7f5bd5025b3c Mon Sep 17 00:00:00 2001 From: Henry Burns Date: Sat, 24 Aug 2019 17:54:37 -0700 Subject: mm/z3fold.c: fix race between migration and destruction In z3fold_destroy_pool() we call destroy_workqueue(&pool->compact_wq). However, we have no guarantee that migration isn't happening in the background at that time. Migration directly calls queue_work_on(pool->compact_wq), if destruction wins that race we are using a destroyed workqueue. Link: http://lkml.kernel.org/r/20190809213828.202833-1-henryburns@google.com Signed-off-by: Henry Burns Cc: Vitaly Wool Cc: Shakeel Butt Cc: Jonathan Adams Cc: Henry Burns Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/z3fold.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/mm/z3fold.c b/mm/z3fold.c index ed19d98c9dcd..e31cd9bd4ed5 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -145,6 +146,8 @@ struct z3fold_header { * @release_wq: workqueue for safe page release * @work: work_struct for safe page release * @inode: inode for z3fold pseudo filesystem + * @destroying: bool to stop migration once we start destruction + * @isolated: int to count the number of pages currently in isolation * * This structure is allocated at pool creation time and maintains metadata * pertaining to a particular z3fold pool. @@ -163,8 +166,11 @@ struct z3fold_pool { const struct zpool_ops *zpool_ops; struct workqueue_struct *compact_wq; struct workqueue_struct *release_wq; + struct wait_queue_head isolate_wait; struct work_struct work; struct inode *inode; + bool destroying; + int isolated; }; /* @@ -769,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, goto out_c; spin_lock_init(&pool->lock); spin_lock_init(&pool->stale_lock); + init_waitqueue_head(&pool->isolate_wait); pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2); if (!pool->unbuddied) goto out_pool; @@ -808,6 +815,15 @@ out: return NULL; } +static bool pool_isolated_are_drained(struct z3fold_pool *pool) +{ + bool ret; + + spin_lock(&pool->lock); + ret = pool->isolated == 0; + spin_unlock(&pool->lock); + return ret; +} /** * z3fold_destroy_pool() - destroys an existing z3fold pool * @pool: the z3fold pool to be destroyed @@ -817,6 +833,22 @@ out: static void z3fold_destroy_pool(struct z3fold_pool *pool) { kmem_cache_destroy(pool->c_handle); + /* + * We set pool-> destroying under lock to ensure that + * z3fold_page_isolate() sees any changes to destroying. This way we + * avoid the need for any memory barriers. + */ + + spin_lock(&pool->lock); + pool->destroying = true; + spin_unlock(&pool->lock); + + /* + * We need to ensure that no pages are being migrated while we destroy + * these workqueues, as migration can queue work on either of the + * workqueues. + */ + wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool)); /* * We need to destroy pool->compact_wq before pool->release_wq, @@ -1307,6 +1339,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool) return atomic64_read(&pool->pages_nr); } +/* + * z3fold_dec_isolated() expects to be called while pool->lock is held. + */ +static void z3fold_dec_isolated(struct z3fold_pool *pool) +{ + assert_spin_locked(&pool->lock); + VM_BUG_ON(pool->isolated <= 0); + pool->isolated--; + + /* + * If we have no more isolated pages, we have to see if + * z3fold_destroy_pool() is waiting for a signal. + */ + if (pool->isolated == 0 && waitqueue_active(&pool->isolate_wait)) + wake_up_all(&pool->isolate_wait); +} + +static void z3fold_inc_isolated(struct z3fold_pool *pool) +{ + pool->isolated++; +} + static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) { struct z3fold_header *zhdr; @@ -1333,6 +1387,33 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) spin_lock(&pool->lock); if (!list_empty(&page->lru)) list_del(&page->lru); + /* + * We need to check for destruction while holding pool->lock, as + * otherwise destruction could see 0 isolated pages, and + * proceed. + */ + if (unlikely(pool->destroying)) { + spin_unlock(&pool->lock); + /* + * If this page isn't stale, somebody else holds a + * reference to it. Let't drop our refcount so that they + * can call the release logic. + */ + if (unlikely(kref_put(&zhdr->refcount, + release_z3fold_page_locked))) { + /* + * If we get here we have kref problems, so we + * should freak out. + */ + WARN(1, "Z3fold is experiencing kref problems\n"); + return false; + } + z3fold_page_unlock(zhdr); + return false; + } + + + z3fold_inc_isolated(pool); spin_unlock(&pool->lock); z3fold_page_unlock(zhdr); return true; @@ -1401,6 +1482,10 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); + spin_lock(&pool->lock); + z3fold_dec_isolated(pool); + spin_unlock(&pool->lock); + page_mapcount_reset(page); put_page(page); return 0; @@ -1420,10 +1505,14 @@ static void z3fold_page_putback(struct page *page) INIT_LIST_HEAD(&page->lru); if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { atomic64_dec(&pool->pages_nr); + spin_lock(&pool->lock); + z3fold_dec_isolated(pool); + spin_unlock(&pool->lock); return; } spin_lock(&pool->lock); list_add(&page->lru, &pool->lru); + z3fold_dec_isolated(pool); spin_unlock(&pool->lock); z3fold_page_unlock(zhdr); } -- cgit v1.2.1 From cd961038381f392b364a7c4a040f4576ca415b1a Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Sat, 24 Aug 2019 17:54:40 -0700 Subject: mm, page_alloc: move_freepages should not examine struct page of reserved memory After commit 907ec5fca3dc ("mm: zero remaining unavailable struct pages"), struct page of reserved memory is zeroed. This causes page->flags to be 0 and fixes issues related to reading /proc/kpageflags, for example, of reserved memory. The VM_BUG_ON() in move_freepages_block(), however, assumes that page_zone() is meaningful even for reserved memory. That assumption is no longer true after the aforementioned commit. There's no reason why move_freepages_block() should be testing the legitimacy of page_zone() for reserved memory; its scope is limited only to pages on the zone's freelist. Note that pfn_valid() can be true for reserved memory: there is a backing struct page. The check for page_to_nid(page) is also buggy but reserved memory normally only appears on node 0 so the zeroing doesn't affect this. Move the debug checks to after verifying PageBuddy is true. This isolates the scope of the checks to only be for buddy pages which are on the zone's freelist which move_freepages_block() is operating on. In this case, an incorrect node or zone is a bug worthy of being warned about (and the examination of struct page is acceptable bcause this memory is not reserved). Why does move_freepages_block() gets called on reserved memory? It's simply math after finding a valid free page from the per-zone free area to use as fallback. We find the beginning and end of the pageblock of the valid page and that can bring us into memory that was reserved per the e820. pfn_valid() is still true (it's backed by a struct page), but since it's zero'd we shouldn't make any inferences here about comparing its node or zone. The current node check just happens to succeed most of the time by luck because reserved memory typically appears on node 0. The fix here is to validate that we actually have buddy pages before testing if there's any type of zone or node strangeness going on. We noticed it almost immediately after bringing 907ec5fca3dc in on CONFIG_DEBUG_VM builds. It depends on finding specific free pages in the per-zone free area where the math in move_freepages() will bring the start or end pfn into reserved memory and wanting to claim that entire pageblock as a new migratetype. So the path will be rare, require CONFIG_DEBUG_VM, and require fallback to a different migratetype. Some struct pages were already zeroed from reserve pages before 907ec5fca3c so it theoretically could trigger before this commit. I think it's rare enough under a config option that most people don't run that others may not have noticed. I wouldn't argue against a stable tag and the backport should be easy enough, but probably wouldn't single out a commit that this is fixing. Mel said: : The overhead of the debugging check is higher with this patch although : it'll only affect debug builds and the path is not particularly hot. : If this was a concern, I think it would be reasonable to simply remove : the debugging check as the zone boundaries are checked in : move_freepages_block and we never expect a zone/node to be smaller than : a pageblock and stuck in the middle of another zone. Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1908122036560.10779@chino.kir.corp.google.com Signed-off-by: David Rientjes Acked-by: Mel Gorman Cc: Naoya Horiguchi Cc: Masayoshi Mizuma Cc: Oscar Salvador Cc: Pavel Tatashin Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 272c6de1bf4e..9c9194959271 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2238,27 +2238,12 @@ static int move_freepages(struct zone *zone, unsigned int order; int pages_moved = 0; -#ifndef CONFIG_HOLES_IN_ZONE - /* - * page_zone is not safe to call in this context when - * CONFIG_HOLES_IN_ZONE is set. This bug check is probably redundant - * anyway as we check zone boundaries in move_freepages_block(). - * Remove at a later date when no bug reports exist related to - * grouping pages by mobility - */ - VM_BUG_ON(pfn_valid(page_to_pfn(start_page)) && - pfn_valid(page_to_pfn(end_page)) && - page_zone(start_page) != page_zone(end_page)); -#endif for (page = start_page; page <= end_page;) { if (!pfn_valid_within(page_to_pfn(page))) { page++; continue; } - /* Make sure we are not inadvertently changing nodes */ - VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); - if (!PageBuddy(page)) { /* * We assume that pages that could be isolated for @@ -2273,6 +2258,10 @@ static int move_freepages(struct zone *zone, continue; } + /* Make sure we are not inadvertently changing nodes */ + VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); + VM_BUG_ON_PAGE(page_zone(page) != zone, page); + order = page_order(page); move_to_free_area(page, &zone->free_area[order], migratetype); page += 1 << order; -- cgit v1.2.1 From bbcb03a97ffe49169f02d34eff2ced56ddaafb4f Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Sat, 24 Aug 2019 17:54:43 -0700 Subject: parisc: fix compilation errrors Commit 0cfaee2af3a0 ("include/asm-generic/5level-fixup.h: fix variable 'p4d' set but not used") converted a few functions from macros to static inline, which causes parisc to complain, In file included from include/asm-generic/4level-fixup.h:38:0, from arch/parisc/include/asm/pgtable.h:5, from arch/parisc/include/asm/io.h:6, from include/linux/io.h:13, from sound/core/memory.c:9: include/asm-generic/5level-fixup.h:14:18: error: unknown type name 'pgd_t'; did you mean 'pid_t'? #define p4d_t pgd_t ^ include/asm-generic/5level-fixup.h:24:28: note: in expansion of macro 'p4d_t' static inline int p4d_none(p4d_t p4d) ^~~~~ It is because "4level-fixup.h" is included before "asm/page.h" where "pgd_t" is defined. Link: http://lkml.kernel.org/r/20190815205305.1382-1-cai@lca.pw Fixes: 0cfaee2af3a0 ("include/asm-generic/5level-fixup.h: fix variable 'p4d' set but not used") Signed-off-by: Qian Cai Reported-by: Guenter Roeck Tested-by: Guenter Roeck Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/parisc/include/asm/pgtable.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index a39b079e73f2..6d58c1739b42 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -2,6 +2,7 @@ #ifndef _PARISC_PGTABLE_H #define _PARISC_PGTABLE_H +#include #include #include @@ -98,8 +99,6 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #endif /* !__ASSEMBLY__ */ -#include - #define pte_ERROR(e) \ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ -- cgit v1.2.1 From c350a99ea2b1b666c28948d74ab46c16913c28a7 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sat, 24 Aug 2019 17:54:47 -0700 Subject: mm: memcontrol: flush percpu vmstats before releasing memcg Percpu caching of local vmstats with the conditional propagation by the cgroup tree leads to an accumulation of errors on non-leaf levels. Let's imagine two nested memory cgroups A and A/B. Say, a process belonging to A/B allocates 100 pagecache pages on the CPU 0. The percpu cache will spill 3 times, so that 32*3=96 pages will be accounted to A/B and A atomic vmstat counters, 4 pages will remain in the percpu cache. Imagine A/B is nearby memory.max, so that every following allocation triggers a direct reclaim on the local CPU. Say, each such attempt will free 16 pages on a new cpu. That means every percpu cache will have -16 pages, except the first one, which will have 4 - 16 = -12. A/B and A atomic counters will not be touched at all. Now a user removes A/B. All percpu caches are freed and corresponding vmstat numbers are forgotten. A has 96 pages more than expected. As memory cgroups are created and destroyed, errors do accumulate. Even 1-2 pages differences can accumulate into large numbers. To fix this issue let's accumulate and propagate percpu vmstat values before releasing the memory cgroup. At this point these numbers are stable and cannot be changed. Since on cpu hotplug we do flush percpu vmstats anyway, we can iterate only over online cpus. Link: http://lkml.kernel.org/r/20190819202338.363363-2-guro@fb.com Fixes: 42a300353577 ("mm: memcontrol: fix recursive statistics correctness & scalabilty") Signed-off-by: Roman Gushchin Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6f5c0c517c49..1a32e32e7ac3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3260,6 +3260,41 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, } } +static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg) +{ + unsigned long stat[MEMCG_NR_STAT]; + struct mem_cgroup *mi; + int node, cpu, i; + + for (i = 0; i < MEMCG_NR_STAT; i++) + stat[i] = 0; + + for_each_online_cpu(cpu) + for (i = 0; i < MEMCG_NR_STAT; i++) + stat[i] += raw_cpu_read(memcg->vmstats_percpu->stat[i]); + + for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) + for (i = 0; i < MEMCG_NR_STAT; i++) + atomic_long_add(stat[i], &mi->vmstats[i]); + + for_each_node(node) { + struct mem_cgroup_per_node *pn = memcg->nodeinfo[node]; + struct mem_cgroup_per_node *pi; + + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) + stat[i] = 0; + + for_each_online_cpu(cpu) + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) + stat[i] += raw_cpu_read( + pn->lruvec_stat_cpu->count[i]); + + for (pi = pn; pi; pi = parent_nodeinfo(pi, node)) + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) + atomic_long_add(stat[i], &pi->lruvec_stat[i]); + } +} + #ifdef CONFIG_MEMCG_KMEM static int memcg_online_kmem(struct mem_cgroup *memcg) { @@ -4682,6 +4717,11 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) { int node; + /* + * Flush percpu vmstats to guarantee the value correctness + * on parent's and all ancestor levels. + */ + memcg_flush_percpu_vmstats(memcg); for_each_node(node) free_mem_cgroup_per_node_info(memcg, node); free_percpu(memcg->vmstats_percpu); -- cgit v1.2.1 From bb65f89b7d3d305c14951f49860711fbcae70692 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sat, 24 Aug 2019 17:54:50 -0700 Subject: mm: memcontrol: flush percpu vmevents before releasing memcg Similar to vmstats, percpu caching of local vmevents leads to an accumulation of errors on non-leaf levels. This happens because some leftovers may remain in percpu caches, so that they are never propagated up by the cgroup tree and just disappear into nonexistence with on releasing of the memory cgroup. To fix this issue let's accumulate and propagate percpu vmevents values before releasing the memory cgroup similar to what we're doing with vmstats. Since on cpu hotplug we do flush percpu vmstats anyway, we can iterate only over online cpus. Link: http://lkml.kernel.org/r/20190819202338.363363-4-guro@fb.com Fixes: 42a300353577 ("mm: memcontrol: fix recursive statistics correctness & scalabilty") Signed-off-by: Roman Gushchin Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1a32e32e7ac3..26e2999af608 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3295,6 +3295,25 @@ static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg) } } +static void memcg_flush_percpu_vmevents(struct mem_cgroup *memcg) +{ + unsigned long events[NR_VM_EVENT_ITEMS]; + struct mem_cgroup *mi; + int cpu, i; + + for (i = 0; i < NR_VM_EVENT_ITEMS; i++) + events[i] = 0; + + for_each_online_cpu(cpu) + for (i = 0; i < NR_VM_EVENT_ITEMS; i++) + events[i] += raw_cpu_read( + memcg->vmstats_percpu->events[i]); + + for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) + for (i = 0; i < NR_VM_EVENT_ITEMS; i++) + atomic_long_add(events[i], &mi->vmevents[i]); +} + #ifdef CONFIG_MEMCG_KMEM static int memcg_online_kmem(struct mem_cgroup *memcg) { @@ -4718,10 +4737,11 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) int node; /* - * Flush percpu vmstats to guarantee the value correctness + * Flush percpu vmstats and vmevents to guarantee the value correctness * on parent's and all ancestor levels. */ memcg_flush_percpu_vmstats(memcg); + memcg_flush_percpu_vmevents(memcg); for_each_node(node) free_mem_cgroup_per_node_info(memcg, node); free_percpu(memcg->vmstats_percpu); -- cgit v1.2.1 From 7b2b55da1db10a5525460633ae4b6fb0be060c41 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Sat, 24 Aug 2019 17:54:53 -0700 Subject: psi: get poll_work to run when calling poll syscall next time Only when calling the poll syscall the first time can user receive POLLPRI correctly. After that, user always fails to acquire the event signal. Reproduce case: 1. Get the monitor code in Documentation/accounting/psi.txt 2. Run it, and wait for the event triggered. 3. Kill and restart the process. The question is why we can end up with poll_scheduled = 1 but the work not running (which would reset it to 0). And the answer is because the scheduling side sees group->poll_kworker under RCU protection and then schedules it, but here we cancel the work and destroy the worker. The cancel needs to pair with resetting the poll_scheduled flag. Link: http://lkml.kernel.org/r/1566357985-97781-1-git-send-email-joseph.qi@linux.alibaba.com Signed-off-by: Jason Xing Signed-off-by: Joseph Qi Reviewed-by: Caspar Zhang Reviewed-by: Suren Baghdasaryan Acked-by: Johannes Weiner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched/psi.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 23fbbcc414d5..6e52b67b420e 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1131,7 +1131,15 @@ static void psi_trigger_destroy(struct kref *ref) * deadlock while waiting for psi_poll_work to acquire trigger_lock */ if (kworker_to_destroy) { + /* + * After the RCU grace period has expired, the worker + * can no longer be found through group->poll_kworker. + * But it might have been already scheduled before + * that - deschedule it cleanly before destroying it. + */ kthread_cancel_delayed_work_sync(&group->poll_work); + atomic_set(&group->poll_scheduled, 0); + kthread_destroy_worker(kworker_to_destroy); } kfree(t); -- cgit v1.2.1 From 46d0b24c5ee10a15dfb25e20642f5a5ed59c5003 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 24 Aug 2019 17:54:56 -0700 Subject: userfaultfd_release: always remove uffd flags and clear vm_userfaultfd_ctx userfaultfd_release() should clear vm_flags/vm_userfaultfd_ctx even if mm->core_state != NULL. Otherwise a page fault can see userfaultfd_missing() == T and use an already freed userfaultfd_ctx. Link: http://lkml.kernel.org/r/20190820160237.GB4983@redhat.com Fixes: 04f5866e41fb ("coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping") Signed-off-by: Oleg Nesterov Reported-by: Kefeng Wang Reviewed-by: Andrea Arcangeli Tested-by: Kefeng Wang Cc: Peter Xu Cc: Mike Rapoport Cc: Jann Horn Cc: Jason Gunthorpe Cc: Michal Hocko Cc: Tetsuo Handa Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index ccbdbd62f0d8..fe6d804a38dc 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -880,6 +880,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) /* len == 0 means wake all */ struct userfaultfd_wake_range range = { .len = 0, }; unsigned long new_flags; + bool still_valid; WRITE_ONCE(ctx->released, true); @@ -895,8 +896,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) * taking the mmap_sem for writing. */ down_write(&mm->mmap_sem); - if (!mmget_still_valid(mm)) - goto skip_mm; + still_valid = mmget_still_valid(mm); prev = NULL; for (vma = mm->mmap; vma; vma = vma->vm_next) { cond_resched(); @@ -907,19 +907,20 @@ static int userfaultfd_release(struct inode *inode, struct file *file) continue; } new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP); - prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end, - new_flags, vma->anon_vma, - vma->vm_file, vma->vm_pgoff, - vma_policy(vma), - NULL_VM_UFFD_CTX); - if (prev) - vma = prev; - else - prev = vma; + if (still_valid) { + prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end, + new_flags, vma->anon_vma, + vma->vm_file, vma->vm_pgoff, + vma_policy(vma), + NULL_VM_UFFD_CTX); + if (prev) + vma = prev; + else + prev = vma; + } vma->vm_flags = new_flags; vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; } -skip_mm: up_write(&mm->mmap_sem); mmput(mm); wakeup: -- cgit v1.2.1 From f7da677bc6e72033f0981b9d58b5c5d409fa641e Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Sat, 24 Aug 2019 17:54:59 -0700 Subject: mm, page_owner: handle THP splits correctly THP splitting path is missing the split_page_owner() call that split_page() has. As a result, split THP pages are wrongly reported in the page_owner file as order-9 pages. Furthermore when the former head page is freed, the remaining former tail pages are not listed in the page_owner file at all. This patch fixes that by adding the split_page_owner() call into __split_huge_page(). Link: http://lkml.kernel.org/r/20190820131828.22684-2-vbabka@suse.cz Fixes: a9627bc5e34e ("mm/page_owner: introduce split_page_owner and replace manual handling") Reported-by: Kirill A. Shutemov Signed-off-by: Vlastimil Babka Cc: Michal Hocko Cc: Mel Gorman Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 738065f765ab..de1f15969e27 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -2516,6 +2517,9 @@ static void __split_huge_page(struct page *page, struct list_head *list, } ClearPageCompound(head); + + split_page_owner(head, HPAGE_PMD_ORDER); + /* See comment in __split_huge_page_tail() */ if (PageAnon(head)) { /* Additional pin to swap cache */ -- cgit v1.2.1 From 1a87aa03597efa9641e92875b883c94c7f872ccb Mon Sep 17 00:00:00 2001 From: Henry Burns Date: Sat, 24 Aug 2019 17:55:03 -0700 Subject: mm/zsmalloc.c: migration can leave pages in ZS_EMPTY indefinitely In zs_page_migrate() we call putback_zspage() after we have finished migrating all pages in this zspage. However, the return value is ignored. If a zs_free() races in between zs_page_isolate() and zs_page_migrate(), freeing the last object in the zspage, putback_zspage() will leave the page in ZS_EMPTY for potentially an unbounded amount of time. To fix this, we need to do the same thing as zs_page_putback() does: schedule free_work to occur. To avoid duplicated code, move the sequence to a new putback_zspage_deferred() function which both zs_page_migrate() and zs_page_putback() call. Link: http://lkml.kernel.org/r/20190809181751.219326-1-henryburns@google.com Fixes: 48b4800a1c6a ("zsmalloc: page migration support") Signed-off-by: Henry Burns Reviewed-by: Sergey Senozhatsky Cc: Henry Burns Cc: Minchan Kim Cc: Shakeel Butt Cc: Jonathan Adams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/zsmalloc.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 57fbb7ced69f..5105b9b66653 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1862,6 +1862,18 @@ static void dec_zspage_isolation(struct zspage *zspage) zspage->isolated--; } +static void putback_zspage_deferred(struct zs_pool *pool, + struct size_class *class, + struct zspage *zspage) +{ + enum fullness_group fg; + + fg = putback_zspage(class, zspage); + if (fg == ZS_EMPTY) + schedule_work(&pool->free_work); + +} + static void replace_sub_page(struct size_class *class, struct zspage *zspage, struct page *newpage, struct page *oldpage) { @@ -2031,7 +2043,7 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, * the list if @page is final isolated subpage in the zspage. */ if (!is_zspage_isolated(zspage)) - putback_zspage(class, zspage); + putback_zspage_deferred(pool, class, zspage); reset_page(page); put_page(page); @@ -2077,14 +2089,13 @@ static void zs_page_putback(struct page *page) spin_lock(&class->lock); dec_zspage_isolation(zspage); if (!is_zspage_isolated(zspage)) { - fg = putback_zspage(class, zspage); /* * Due to page_lock, we cannot free zspage immediately * so let's defer. */ - if (fg == ZS_EMPTY) - schedule_work(&pool->free_work); + putback_zspage_deferred(pool, class, zspage); } + spin_unlock(&class->lock); } -- cgit v1.2.1 From 701d678599d0c1623aaf4139c03eea260a75b027 Mon Sep 17 00:00:00 2001 From: Henry Burns Date: Sat, 24 Aug 2019 17:55:06 -0700 Subject: mm/zsmalloc.c: fix race condition in zs_destroy_pool In zs_destroy_pool() we call flush_work(&pool->free_work). However, we have no guarantee that migration isn't happening in the background at that time. Since migration can't directly free pages, it relies on free_work being scheduled to free the pages. But there's nothing preventing an in-progress migrate from queuing the work *after* zs_unregister_migration() has called flush_work(). Which would mean pages still pointing at the inode when we free it. Since we know at destroy time all objects should be free, no new migrations can come in (since zs_page_isolate() fails for fully-free zspages). This means it is sufficient to track a "# isolated zspages" count by class, and have the destroy logic ensure all such pages have drained before proceeding. Keeping that state under the class spinlock keeps the logic straightforward. In this case a memory leak could lead to an eventual crash if compaction hits the leaked page. This crash would only occur if people are changing their zswap backend at runtime (which eventually starts destruction). Link: http://lkml.kernel.org/r/20190809181751.219326-2-henryburns@google.com Fixes: 48b4800a1c6a ("zsmalloc: page migration support") Signed-off-by: Henry Burns Reviewed-by: Sergey Senozhatsky Cc: Henry Burns Cc: Minchan Kim Cc: Shakeel Butt Cc: Jonathan Adams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/zsmalloc.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 5105b9b66653..08def3a0d200 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -268,6 +269,10 @@ struct zs_pool { #ifdef CONFIG_COMPACTION struct inode *inode; struct work_struct free_work; + /* A wait queue for when migration races with async_free_zspage() */ + struct wait_queue_head migration_wait; + atomic_long_t isolated_pages; + bool destroying; #endif }; @@ -1874,6 +1879,19 @@ static void putback_zspage_deferred(struct zs_pool *pool, } +static inline void zs_pool_dec_isolated(struct zs_pool *pool) +{ + VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0); + atomic_long_dec(&pool->isolated_pages); + /* + * There's no possibility of racing, since wait_for_isolated_drain() + * checks the isolated count under &class->lock after enqueuing + * on migration_wait. + */ + if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying) + wake_up_all(&pool->migration_wait); +} + static void replace_sub_page(struct size_class *class, struct zspage *zspage, struct page *newpage, struct page *oldpage) { @@ -1943,6 +1961,7 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode) */ if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) { get_zspage_mapping(zspage, &class_idx, &fullness); + atomic_long_inc(&pool->isolated_pages); remove_zspage(class, zspage, fullness); } @@ -2042,8 +2061,16 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, * Page migration is done so let's putback isolated zspage to * the list if @page is final isolated subpage in the zspage. */ - if (!is_zspage_isolated(zspage)) + if (!is_zspage_isolated(zspage)) { + /* + * We cannot race with zs_destroy_pool() here because we wait + * for isolation to hit zero before we start destroying. + * Also, we ensure that everyone can see pool->destroying before + * we start waiting. + */ putback_zspage_deferred(pool, class, zspage); + zs_pool_dec_isolated(pool); + } reset_page(page); put_page(page); @@ -2094,8 +2121,8 @@ static void zs_page_putback(struct page *page) * so let's defer. */ putback_zspage_deferred(pool, class, zspage); + zs_pool_dec_isolated(pool); } - spin_unlock(&class->lock); } @@ -2118,8 +2145,36 @@ static int zs_register_migration(struct zs_pool *pool) return 0; } +static bool pool_isolated_are_drained(struct zs_pool *pool) +{ + return atomic_long_read(&pool->isolated_pages) == 0; +} + +/* Function for resolving migration */ +static void wait_for_isolated_drain(struct zs_pool *pool) +{ + + /* + * We're in the process of destroying the pool, so there are no + * active allocations. zs_page_isolate() fails for completely free + * zspages, so we need only wait for the zs_pool's isolated + * count to hit zero. + */ + wait_event(pool->migration_wait, + pool_isolated_are_drained(pool)); +} + static void zs_unregister_migration(struct zs_pool *pool) { + pool->destroying = true; + /* + * We need a memory barrier here to ensure global visibility of + * pool->destroying. Thus pool->isolated pages will either be 0 in which + * case we don't care, or it will be > 0 and pool->destroying will + * ensure that we wake up once isolation hits 0. + */ + smp_mb(); + wait_for_isolated_drain(pool); /* This can block */ flush_work(&pool->free_work); iput(pool->inode); } @@ -2357,6 +2412,8 @@ struct zs_pool *zs_create_pool(const char *name) if (!pool->name) goto err; + init_waitqueue_head(&pool->migration_wait); + if (create_cache(pool)) goto err; -- cgit v1.2.1 From 00fb24a42a68b1ee0f6495993fe1be7124433dfb Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Sat, 24 Aug 2019 17:55:09 -0700 Subject: mm/kasan: fix false positive invalid-free reports with CONFIG_KASAN_SW_TAGS=y The code like this: ptr = kmalloc(size, GFP_KERNEL); page = virt_to_page(ptr); offset = offset_in_page(ptr); kfree(page_address(page) + offset); may produce false-positive invalid-free reports on the kernel with CONFIG_KASAN_SW_TAGS=y. In the example above we lose the original tag assigned to 'ptr', so kfree() gets the pointer with 0xFF tag. In kfree() we check that 0xFF tag is different from the tag in shadow hence print false report. Instead of just comparing tags, do the following: 1) Check that shadow doesn't contain KASAN_TAG_INVALID. Otherwise it's double-free and it doesn't matter what tag the pointer have. 2) If pointer tag is different from 0xFF, make sure that tag in the shadow is the same as in the pointer. Link: http://lkml.kernel.org/r/20190819172540.19581-1-aryabinin@virtuozzo.com Fixes: 7f94ffbc4c6a ("kasan: add hooks implementation for tag-based mode") Signed-off-by: Andrey Ryabinin Reported-by: Walter Wu Reported-by: Mark Rutland Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Catalin Marinas Cc: Will Deacon Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/common.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 2277b82902d8..95d16a42db6b 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -407,8 +407,14 @@ static inline bool shadow_invalid(u8 tag, s8 shadow_byte) if (IS_ENABLED(CONFIG_KASAN_GENERIC)) return shadow_byte < 0 || shadow_byte >= KASAN_SHADOW_SCALE_SIZE; - else - return tag != (u8)shadow_byte; + + /* else CONFIG_KASAN_SW_TAGS: */ + if ((u8)shadow_byte == KASAN_TAG_INVALID) + return true; + if ((tag != KASAN_TAG_KERNEL) && (tag != (u8)shadow_byte)) + return true; + + return false; } static bool __kasan_slab_free(struct kmem_cache *cache, void *object, -- cgit v1.2.1 From 75545304eba6a3d282f923b96a466dc25a81e359 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 25 Aug 2019 09:21:44 +0200 Subject: ALSA: seq: Fix potential concurrent access to the deleted pool The input pool of a client might be deleted via the resize ioctl, the the access to it should be covered by the proper locks. Currently the only missing place is the call in snd_seq_ioctl_get_client_pool(), and this patch papers over it. Reported-by: syzbot+4a75454b9ca2777f35c7@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 3 +-- sound/core/seq/seq_fifo.c | 17 +++++++++++++++++ sound/core/seq/seq_fifo.h | 2 ++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 7737b2670064..6d9592f0ae1d 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1835,8 +1835,7 @@ static int snd_seq_ioctl_get_client_pool(struct snd_seq_client *client, if (cptr->type == USER_CLIENT) { info->input_pool = cptr->data.user.fifo_pool_size; info->input_free = info->input_pool; - if (cptr->data.user.fifo) - info->input_free = snd_seq_unused_cells(cptr->data.user.fifo->pool); + info->input_free = snd_seq_fifo_unused_cells(cptr->data.user.fifo); } else { info->input_pool = 0; info->input_free = 0; diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c index ea69261f269a..eaaa8b5830bb 100644 --- a/sound/core/seq/seq_fifo.c +++ b/sound/core/seq/seq_fifo.c @@ -263,3 +263,20 @@ int snd_seq_fifo_resize(struct snd_seq_fifo *f, int poolsize) return 0; } + +/* get the number of unused cells safely */ +int snd_seq_fifo_unused_cells(struct snd_seq_fifo *f) +{ + unsigned long flags; + int cells; + + if (!f) + return 0; + + snd_use_lock_use(&f->use_lock); + spin_lock_irqsave(&f->lock, flags); + cells = snd_seq_unused_cells(f->pool); + spin_unlock_irqrestore(&f->lock, flags); + snd_use_lock_free(&f->use_lock); + return cells; +} diff --git a/sound/core/seq/seq_fifo.h b/sound/core/seq/seq_fifo.h index edc68743943d..b56a7b897c9c 100644 --- a/sound/core/seq/seq_fifo.h +++ b/sound/core/seq/seq_fifo.h @@ -53,5 +53,7 @@ int snd_seq_fifo_poll_wait(struct snd_seq_fifo *f, struct file *file, poll_table /* resize pool in fifo */ int snd_seq_fifo_resize(struct snd_seq_fifo *f, int poolsize); +/* get the number of unused cells safely */ +int snd_seq_fifo_unused_cells(struct snd_seq_fifo *f); #endif -- cgit v1.2.1 From a55aa89aab90fae7c815b0551b07be37db359d76 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Aug 2019 12:01:23 -0700 Subject: Linux 5.3-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9fa18613566f..f125625efd60 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Bobtail Squid # *DOCUMENTATION* -- cgit v1.2.1 From 9b5f684182403f2b338f797c44eca0061c797dc8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 25 Aug 2019 07:47:30 -0700 Subject: nexthop: Fix nexthop_num_path for blackhole nexthops Donald reported this sequence: ip next add id 1 blackhole ip next add id 2 blackhole ip ro add 1.1.1.1/32 nhid 1 ip ro add 1.1.1.2/32 nhid 2 would cause a crash. Backtrace is: [ 151.302790] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 151.304043] CPU: 1 PID: 277 Comm: ip Not tainted 5.3.0-rc5+ #37 [ 151.305078] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.1-1 04/01/2014 [ 151.306526] RIP: 0010:fib_add_nexthop+0x8b/0x2aa [ 151.307343] Code: 35 f7 81 48 8d 14 01 c7 02 f1 f1 f1 f1 c7 42 04 01 f4 f4 f4 48 89 f2 48 c1 ea 03 65 48 8b 0c 25 28 00 00 00 48 89 4d d0 31 c9 <80> 3c 02 00 74 08 48 89 f7 e8 1a e8 53 ff be 08 00 00 00 4c 89 e7 [ 151.310549] RSP: 0018:ffff888116c27340 EFLAGS: 00010246 [ 151.311469] RAX: dffffc0000000000 RBX: ffff8881154ece00 RCX: 0000000000000000 [ 151.312713] RDX: 0000000000000004 RSI: 0000000000000020 RDI: ffff888115649b40 [ 151.313968] RBP: ffff888116c273d8 R08: ffffed10221e3757 R09: ffff888110f1bab8 [ 151.315212] R10: 0000000000000001 R11: ffff888110f1bab3 R12: ffff888115649b40 [ 151.316456] R13: 0000000000000020 R14: ffff888116c273b0 R15: ffff888115649b40 [ 151.317707] FS: 00007f60b4d8d800(0000) GS:ffff88811ac00000(0000) knlGS:0000000000000000 [ 151.319113] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 151.320119] CR2: 0000555671ffdc00 CR3: 00000001136ba005 CR4: 0000000000020ee0 [ 151.321367] Call Trace: [ 151.321820] ? fib_nexthop_info+0x635/0x635 [ 151.322572] fib_dump_info+0xaa4/0xde0 [ 151.323247] ? fib_create_info+0x2431/0x2431 [ 151.324008] ? napi_alloc_frag+0x2a/0x2a [ 151.324711] rtmsg_fib+0x2c4/0x3be [ 151.325339] fib_table_insert+0xe2f/0xeee ... fib_dump_info incorrectly has nhs = 0 for blackhole nexthops, so it believes the nexthop object is a multipath group (nhs != 1) and ends up down the nexthop_mpath_fill_node() path which is wrong for a blackhole. The blackhole check in nexthop_num_path is leftover from early days of the blackhole implementation which did not initialize the device. In the end the design was simpler (fewer special case checks) to set the device to loopback in nh_info, so the check in nexthop_num_path should have been removed. Fixes: 430a049190de ("nexthop: Add support for nexthop groups") Reported-by: Donald Sharp Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/nexthop.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 25f1f9a8419b..95f766c31c90 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -141,12 +141,6 @@ static inline unsigned int nexthop_num_path(const struct nexthop *nh) nh_grp = rcu_dereference_rtnl(nh->nh_grp); rc = nh_grp->num_nh; - } else { - const struct nh_info *nhi; - - nhi = rcu_dereference_rtnl(nh->nh_info); - if (nhi->reject_nh) - rc = 0; } return rc; -- cgit v1.2.1 From 803f3e22ae10003a83c781498c0ac34cfe3463ff Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Fri, 23 Aug 2019 20:51:43 +0300 Subject: ipv4: mpls: fix mpls_xmit for iptunnel When using mpls over gre/gre6 setup, rt->rt_gw4 address is not set, the same for rt->rt_gw_family. Therefore, when rt->rt_gw_family is checked in mpls_xmit(), neigh_xmit() call is skipped. As a result, such setup doesn't work anymore. This issue was found with LTP mpls03 tests. Fixes: 1550c171935d ("ipv4: Prepare rtable for IPv6 gateway") Signed-off-by: Alexey Kodanev Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/mpls/mpls_iptunnel.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index d25e91d7bdc1..44b675016393 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -133,12 +133,12 @@ static int mpls_xmit(struct sk_buff *skb) mpls_stats_inc_outucastpkts(out_dev, skb); if (rt) { - if (rt->rt_gw_family == AF_INET) - err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gw4, - skb); - else if (rt->rt_gw_family == AF_INET6) + if (rt->rt_gw_family == AF_INET6) err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt->rt_gw6, skb); + else + err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gw4, + skb); } else if (rt6) { if (ipv6_addr_v4mapped(&rt6->rt6i_gateway)) { /* 6PE (RFC 4798) */ -- cgit v1.2.1 From 7177895154e6a35179d332f4a584d396c50d0612 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Thu, 22 Aug 2019 13:17:50 -0700 Subject: openvswitch: Fix conntrack cache with timeout This patch addresses a conntrack cache issue with timeout policy. Currently, we do not check if the timeout extension is set properly in the cached conntrack entry. Thus, after packet recirculate from conntrack action, the timeout policy is not applied properly. This patch fixes the aforementioned issue. Fixes: 06bd2bdf19d2 ("openvswitch: Add timeout support to ct action") Reported-by: kbuild test robot Signed-off-by: Yi-Hung Wei Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index a1852e035ebb..d8da6477d6be 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -67,6 +67,7 @@ struct ovs_conntrack_info { struct md_mark mark; struct md_labels labels; char timeout[CTNL_TIMEOUT_NAME_MAX]; + struct nf_ct_timeout *nf_ct_timeout; #if IS_ENABLED(CONFIG_NF_NAT) struct nf_nat_range2 range; /* Only present for SRC NAT and DST NAT. */ #endif @@ -697,6 +698,14 @@ static bool skb_nfct_cached(struct net *net, if (help && rcu_access_pointer(help->helper) != info->helper) return false; } + if (info->nf_ct_timeout) { + struct nf_conn_timeout *timeout_ext; + + timeout_ext = nf_ct_timeout_find(ct); + if (!timeout_ext || info->nf_ct_timeout != + rcu_dereference(timeout_ext->timeout)) + return false; + } /* Force conntrack entry direction to the current packet? */ if (info->force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { /* Delete the conntrack entry if confirmed, else just release @@ -1657,6 +1666,10 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, ct_info.timeout)) pr_info_ratelimited("Failed to associated timeout " "policy `%s'\n", ct_info.timeout); + else + ct_info.nf_ct_timeout = rcu_dereference( + nf_ct_timeout_find(ct_info.ct)->timeout); + } if (helper) { -- cgit v1.2.1 From f53a7ad189594a112167efaf17ea8d0242b5ac00 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Sat, 24 Aug 2019 01:36:19 -0700 Subject: r8152: Set memory to all 0xFFs on failed reg reads get_registers() blindly copies the memory written to by the usb_control_msg() call even if the underlying urb failed. This could lead to junk register values being read by the driver, since some indirect callers of get_registers() ignore the return values. One example is: ocp_read_dword() ignores the return value of generic_ocp_read(), which calls get_registers(). So, emulate PCI "Master Abort" behavior by setting the buffer to all 0xFFs when usb_control_msg() fails. This patch is copied from the r8152 driver (v2.12.0) published by Realtek (www.realtek.com). Signed-off-by: Prashant Malani Acked-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0cc03a9ff545..eee0f5007ee3 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -799,8 +799,11 @@ int get_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) ret = usb_control_msg(tp->udev, usb_rcvctrlpipe(tp->udev, 0), RTL8152_REQ_GET_REGS, RTL8152_REQT_READ, value, index, tmp, size, 500); + if (ret < 0) + memset(data, 0xff, size); + else + memcpy(data, tmp, size); - memcpy(data, tmp, size); kfree(tmp); return ret; -- cgit v1.2.1 From 3e5bedc2c258341702ddffbd7688c5e6eb01eafa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 21 Aug 2019 15:16:31 +0200 Subject: x86/apic: Fix arch_dynirq_lower_bound() bug for DT enabled machines Rahul Tanwar reported the following bug on DT systems: > 'ioapic_dynirq_base' contains the virtual IRQ base number. Presently, it is > updated to the end of hardware IRQ numbers but this is done only when IOAPIC > configuration type is IOAPIC_DOMAIN_LEGACY or IOAPIC_DOMAIN_STRICT. There is > a third type IOAPIC_DOMAIN_DYNAMIC which applies when IOAPIC configuration > comes from devicetree. > > See dtb_add_ioapic() in arch/x86/kernel/devicetree.c > > In case of IOAPIC_DOMAIN_DYNAMIC (DT/OF based system), 'ioapic_dynirq_base' > remains to zero initialized value. This means that for OF based systems, > virtual IRQ base will get set to zero. Such systems will very likely not even boot. For DT enabled machines ioapic_dynirq_base is irrelevant and not updated, so simply map the IRQ base 1:1 instead. Reported-by: Rahul Tanwar Tested-by: Rahul Tanwar Tested-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Cc: Alexander Shishkin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: alan@linux.intel.com Cc: bp@alien8.de Cc: cheol.yong.kim@intel.com Cc: qi-ming.wu@intel.com Cc: rahul.tanwar@intel.com Cc: rppt@linux.ibm.com Cc: tony.luck@intel.com Link: http://lkml.kernel.org/r/20190821081330.1187-1-rahul.tanwar@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index c7bb6c69f21c..d6af97fd170a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2438,7 +2438,13 @@ unsigned int arch_dynirq_lower_bound(unsigned int from) * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use * gsi_top if ioapic_dynirq_base hasn't been initialized yet. */ - return ioapic_initialized ? ioapic_dynirq_base : gsi_top; + if (!ioapic_initialized) + return gsi_top; + /* + * For DT enabled machines ioapic_dynirq_base is irrelevant and not + * updated. So simply return @from if ioapic_dynirq_base == 0. + */ + return ioapic_dynirq_base ? : from; } #ifdef CONFIG_X86_32 -- cgit v1.2.1 From 9212ec7d8357ea630031e89d0d399c761421c83b Mon Sep 17 00:00:00 2001 From: Sebastian Mayr Date: Sun, 28 Jul 2019 17:26:17 +0200 Subject: uprobes/x86: Fix detection of 32-bit user mode 32-bit processes running on a 64-bit kernel are not always detected correctly, causing the process to crash when uretprobes are installed. The reason for the crash is that in_ia32_syscall() is used to determine the process's mode, which only works correctly when called from a syscall. In the case of uretprobes, however, the function is called from a exception and always returns 'false' on a 64-bit kernel. In consequence this leads to corruption of the process's return address. Fix this by using user_64bit_mode() instead of in_ia32_syscall(), which is correct in any situation. [ tglx: Add a comment and the following historical info ] This should have been detected by the rename which happened in commit abfb9498ee13 ("x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()") which states in the changelog: The is_ia32_task()/is_x32_task() function names are a big misnomer: they suggests that the compat-ness of a system call is a task property, which is not true, the compatness of a system call purely depends on how it was invoked through the system call layer. ..... and then it went and blindly renamed every call site. Sadly enough this was already mentioned here: 8faaed1b9f50 ("uprobes/x86: Introduce sizeof_long(), cleanup adjust_ret_addr() and arch_uretprobe_hijack_return_addr()") where the changelog says: TODO: is_ia32_task() is not what we actually want, TS_COMPAT does not necessarily mean 32bit. Fortunately syscall-like insns can't be probed so it actually works, but it would be better to rename and use is_ia32_frame(). and goes all the way back to: 0326f5a94dde ("uprobes/core: Handle breakpoint and singlestep exceptions") Oh well. 7+ years until someone actually tried a uretprobe on a 32bit process on a 64bit kernel.... Fixes: 0326f5a94dde ("uprobes/core: Handle breakpoint and singlestep exceptions") Signed-off-by: Sebastian Mayr Signed-off-by: Thomas Gleixner Cc: Masami Hiramatsu Cc: Dmitry Safonov Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190728152617.7308-1-me@sam.st --- arch/x86/kernel/uprobes.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index d8359ebeea70..8cd745ef8c7b 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -508,9 +508,12 @@ struct uprobe_xol_ops { void (*abort)(struct arch_uprobe *, struct pt_regs *); }; -static inline int sizeof_long(void) +static inline int sizeof_long(struct pt_regs *regs) { - return in_ia32_syscall() ? 4 : 8; + /* + * Check registers for mode as in_xxx_syscall() does not apply here. + */ + return user_64bit_mode(regs) ? 8 : 4; } static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) @@ -521,9 +524,9 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) static int emulate_push_stack(struct pt_regs *regs, unsigned long val) { - unsigned long new_sp = regs->sp - sizeof_long(); + unsigned long new_sp = regs->sp - sizeof_long(regs); - if (copy_to_user((void __user *)new_sp, &val, sizeof_long())) + if (copy_to_user((void __user *)new_sp, &val, sizeof_long(regs))) return -EFAULT; regs->sp = new_sp; @@ -556,7 +559,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs long correction = utask->vaddr - utask->xol_vaddr; regs->ip += correction; } else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) { - regs->sp += sizeof_long(); /* Pop incorrect return address */ + regs->sp += sizeof_long(regs); /* Pop incorrect return address */ if (emulate_push_stack(regs, utask->vaddr + auprobe->defparam.ilen)) return -ERESTART; } @@ -675,7 +678,7 @@ static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) * "call" insn was executed out-of-line. Just restore ->sp and restart. * We could also restore ->ip and try to call branch_emulate_op() again. */ - regs->sp += sizeof_long(); + regs->sp += sizeof_long(regs); return -ERESTART; } @@ -1056,7 +1059,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs) { - int rasize = sizeof_long(), nleft; + int rasize = sizeof_long(regs), nleft; unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */ if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize)) -- cgit v1.2.1 From 2fd2329393658514db074abd4f7dea8da1c20f81 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 26 Aug 2019 22:55:15 +0900 Subject: ALSA: oxfw: fix to handle correct stream for PCM playback When userspace application calls ioctl(2) to configure hardware for PCM playback substream, ALSA OXFW driver handles incoming AMDTP stream. In this case, outgoing AMDTP stream should be handled. This commit fixes the bug for v5.3-rc kernel. Fixes: 4f380d007052 ("ALSA: oxfw: configure packet format in pcm.hw_params callback") Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/firewire/oxfw/oxfw-pcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/oxfw/oxfw-pcm.c b/sound/firewire/oxfw/oxfw-pcm.c index 9ea39348cdf5..7c6d1c277d4d 100644 --- a/sound/firewire/oxfw/oxfw-pcm.c +++ b/sound/firewire/oxfw/oxfw-pcm.c @@ -248,7 +248,7 @@ static int pcm_playback_hw_params(struct snd_pcm_substream *substream, unsigned int channels = params_channels(hw_params); mutex_lock(&oxfw->mutex); - err = snd_oxfw_stream_reserve_duplex(oxfw, &oxfw->tx_stream, + err = snd_oxfw_stream_reserve_duplex(oxfw, &oxfw->rx_stream, rate, channels); if (err >= 0) ++oxfw->substreams_count; -- cgit v1.2.1 From 174ae4e96e0f54958cbe3fd3090a3cefeb63af4d Mon Sep 17 00:00:00 2001 From: Mischa Jonker Date: Wed, 24 Jul 2019 14:04:34 +0200 Subject: ARCv2: IDU-intc: Add support for edge-triggered interrupts This adds support for an optional extra interrupt cell to specify edge vs level triggered. It is backward compatible with dts files with only one cell, and will default to level-triggered in such a case. Note that I had to make a change to idu_irq_set_affinity as well, as this function was setting the interrupt type to "level" unconditionally, since this was the only type supported previously. Signed-off-by: Mischa Jonker Reviewed-by: Vineet Gupta Signed-off-by: Vineet Gupta --- arch/arc/kernel/mcip.c | 60 +++++++++++++++++++++++++++++++++++++++++++++----- include/soc/arc/mcip.h | 11 +++++++++ 2 files changed, 65 insertions(+), 6 deletions(-) diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 18b493dfb3a8..abf9398cc333 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -202,8 +202,8 @@ static void idu_set_dest(unsigned int cmn_irq, unsigned int cpu_mask) __mcip_cmd_data(CMD_IDU_SET_DEST, cmn_irq, cpu_mask); } -static void idu_set_mode(unsigned int cmn_irq, unsigned int lvl, - unsigned int distr) +static void idu_set_mode(unsigned int cmn_irq, bool set_lvl, unsigned int lvl, + bool set_distr, unsigned int distr) { union { unsigned int word; @@ -212,8 +212,11 @@ static void idu_set_mode(unsigned int cmn_irq, unsigned int lvl, }; } data; - data.distr = distr; - data.lvl = lvl; + data.word = __mcip_cmd_read(CMD_IDU_READ_MODE, cmn_irq); + if (set_distr) + data.distr = distr; + if (set_lvl) + data.lvl = lvl; __mcip_cmd_data(CMD_IDU_SET_MODE, cmn_irq, data.word); } @@ -240,6 +243,25 @@ static void idu_irq_unmask(struct irq_data *data) raw_spin_unlock_irqrestore(&mcip_lock, flags); } +static void idu_irq_ack(struct irq_data *data) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&mcip_lock, flags); + __mcip_cmd(CMD_IDU_ACK_CIRQ, data->hwirq); + raw_spin_unlock_irqrestore(&mcip_lock, flags); +} + +static void idu_irq_mask_ack(struct irq_data *data) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&mcip_lock, flags); + __mcip_cmd_data(CMD_IDU_SET_MASK, data->hwirq, 1); + __mcip_cmd(CMD_IDU_ACK_CIRQ, data->hwirq); + raw_spin_unlock_irqrestore(&mcip_lock, flags); +} + static int idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask, bool force) @@ -263,13 +285,36 @@ idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask, else distribution_mode = IDU_M_DISTRI_RR; - idu_set_mode(data->hwirq, IDU_M_TRIG_LEVEL, distribution_mode); + idu_set_mode(data->hwirq, false, 0, true, distribution_mode); raw_spin_unlock_irqrestore(&mcip_lock, flags); return IRQ_SET_MASK_OK; } +static int idu_irq_set_type(struct irq_data *data, u32 type) +{ + unsigned long flags; + + /* + * ARCv2 IDU HW does not support inverse polarity, so these are the + * only interrupt types supported. + */ + if (type & ~(IRQ_TYPE_EDGE_RISING | IRQ_TYPE_LEVEL_HIGH)) + return -EINVAL; + + raw_spin_lock_irqsave(&mcip_lock, flags); + + idu_set_mode(data->hwirq, true, + type & IRQ_TYPE_EDGE_RISING ? IDU_M_TRIG_EDGE : + IDU_M_TRIG_LEVEL, + false, 0); + + raw_spin_unlock_irqrestore(&mcip_lock, flags); + + return 0; +} + static void idu_irq_enable(struct irq_data *data) { /* @@ -289,7 +334,10 @@ static struct irq_chip idu_irq_chip = { .name = "MCIP IDU Intc", .irq_mask = idu_irq_mask, .irq_unmask = idu_irq_unmask, + .irq_ack = idu_irq_ack, + .irq_mask_ack = idu_irq_mask_ack, .irq_enable = idu_irq_enable, + .irq_set_type = idu_irq_set_type, #ifdef CONFIG_SMP .irq_set_affinity = idu_irq_set_affinity, #endif @@ -317,7 +365,7 @@ static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t } static const struct irq_domain_ops idu_irq_ops = { - .xlate = irq_domain_xlate_onecell, + .xlate = irq_domain_xlate_onetwocell, .map = idu_irq_map, }; diff --git a/include/soc/arc/mcip.h b/include/soc/arc/mcip.h index 50f49e043668..d1a93c73f006 100644 --- a/include/soc/arc/mcip.h +++ b/include/soc/arc/mcip.h @@ -46,7 +46,9 @@ struct mcip_cmd { #define CMD_IDU_ENABLE 0x71 #define CMD_IDU_DISABLE 0x72 #define CMD_IDU_SET_MODE 0x74 +#define CMD_IDU_READ_MODE 0x75 #define CMD_IDU_SET_DEST 0x76 +#define CMD_IDU_ACK_CIRQ 0x79 #define CMD_IDU_SET_MASK 0x7C #define IDU_M_TRIG_LEVEL 0x0 @@ -119,4 +121,13 @@ static inline void __mcip_cmd_data(unsigned int cmd, unsigned int param, __mcip_cmd(cmd, param); } +/* + * Read MCIP register + */ +static inline unsigned int __mcip_cmd_read(unsigned int cmd, unsigned int param) +{ + __mcip_cmd(cmd, param); + return read_aux_reg(ARC_REG_MCIP_READBACK); +} + #endif -- cgit v1.2.1 From 01449985e644329e1fd5c269fff07b9a539eeebf Mon Sep 17 00:00:00 2001 From: Mischa Jonker Date: Wed, 24 Jul 2019 14:04:35 +0200 Subject: dt-bindings: IDU-intc: Clean up documentation * Some lines exceeded 80 characters. * Clarified statement about AUX register interface Signed-off-by: Mischa Jonker Reviewed-by: Rob Herring Signed-off-by: Vineet Gupta --- .../bindings/interrupt-controller/snps,archs-idu-intc.txt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt index 09fc02b99845..c5a1c7b4fc3f 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt @@ -1,7 +1,8 @@ * ARC-HS Interrupt Distribution Unit - This optional 2nd level interrupt controller can be used in SMP configurations for - dynamic IRQ routing, load balancing of common/external IRQs towards core intc. + This optional 2nd level interrupt controller can be used in SMP configurations + for dynamic IRQ routing, load balancing of common/external IRQs towards core + intc. Properties: @@ -13,8 +14,8 @@ Properties: of the particular interrupt line of IDU corresponds to the line N+24 of the core interrupt controller. - intc accessed via the special ARC AUX register interface, hence "reg" property - is not specified. + The interrupt controller is accessed via the special ARC AUX register + interface, hence "reg" property is not specified. Example: core_intc: core-interrupt-controller { -- cgit v1.2.1 From d85f6b93a76e74f1cbd0c14fb685cc1bc8df9044 Mon Sep 17 00:00:00 2001 From: Mischa Jonker Date: Wed, 24 Jul 2019 14:04:36 +0200 Subject: dt-bindings: IDU-intc: Add support for edge-triggered interrupts This updates the documentation for supporting an optional extra interrupt cell to specify edge vs level triggered. Signed-off-by: Mischa Jonker Reviewed-by: Rob Herring Signed-off-by: Vineet Gupta --- .../interrupt-controller/snps,archs-idu-intc.txt | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt index c5a1c7b4fc3f..a5c1db95b3ec 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt @@ -8,11 +8,20 @@ Properties: - compatible: "snps,archs-idu-intc" - interrupt-controller: This is an interrupt controller. -- #interrupt-cells: Must be <1>. - - Value of the cell specifies the "common" IRQ from peripheral to IDU. Number N - of the particular interrupt line of IDU corresponds to the line N+24 of the - core interrupt controller. +- #interrupt-cells: Must be <1> or <2>. + + Value of the first cell specifies the "common" IRQ from peripheral to IDU. + Number N of the particular interrupt line of IDU corresponds to the line N+24 + of the core interrupt controller. + + The (optional) second cell specifies any of the following flags: + - bits[3:0] trigger type and level flags + 1 = low-to-high edge triggered + 2 = NOT SUPPORTED (high-to-low edge triggered) + 4 = active high level-sensitive <<< DEFAULT + 8 = NOT SUPPORTED (active low level-sensitive) + When no second cell is specified, the interrupt is assumed to be level + sensitive. The interrupt controller is accessed via the special ARC AUX register interface, hence "reg" property is not specified. -- cgit v1.2.1 From 2f029413cbfbfe519d294c6ac83a0c00e2a48a97 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 12 Aug 2019 14:50:35 -0700 Subject: arc: prefer __section from compiler_attributes.h Reported-by: Sedat Dilek Suggested-by: Josh Poimboeuf Signed-off-by: Nick Desaulniers Signed-off-by: Vineet Gupta --- arch/arc/include/asm/linkage.h | 8 ++++---- arch/arc/include/asm/mach_desc.h | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index a0eeb9f8f0a9..d9ee43c6b7db 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -62,15 +62,15 @@ #else /* !__ASSEMBLY__ */ #ifdef CONFIG_ARC_HAS_ICCM -#define __arcfp_code __attribute__((__section__(".text.arcfp"))) +#define __arcfp_code __section(.text.arcfp) #else -#define __arcfp_code __attribute__((__section__(".text"))) +#define __arcfp_code __section(.text) #endif #ifdef CONFIG_ARC_HAS_DCCM -#define __arcfp_data __attribute__((__section__(".data.arcfp"))) +#define __arcfp_data __section(.data.arcfp) #else -#define __arcfp_data __attribute__((__section__(".data"))) +#define __arcfp_data __section(.data) #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h index 8ac0e2ac3e70..73746ed5b834 100644 --- a/arch/arc/include/asm/mach_desc.h +++ b/arch/arc/include/asm/mach_desc.h @@ -53,8 +53,7 @@ extern const struct machine_desc __arch_info_begin[], __arch_info_end[]; */ #define MACHINE_START(_type, _name) \ static const struct machine_desc __mach_desc_##_type \ -__used \ -__attribute__((__section__(".arch.info.init"))) = { \ +__used __section(.arch.info.init) = { \ .name = _name, #define MACHINE_END \ -- cgit v1.2.1 From bae3a8d3308ee69a7dbdf145911b18dfda8ade0d Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Mon, 26 Aug 2019 06:15:12 -0400 Subject: x86/apic: Do not initialize LDR and DFR for bigsmp Legacy apic init uses bigsmp for smp systems with 8 and more CPUs. The bigsmp APIC implementation uses physical destination mode, but it nevertheless initializes LDR and DFR. The LDR even ends up incorrectly with multiple bit being set. This does not cause a functional problem because LDR and DFR are ignored when physical destination mode is active, but it triggered a problem on a 32-bit KVM guest which jumps into a kdump kernel. The multiple bits set unearthed a bug in the KVM APIC implementation. The code which creates the logical destination map for VCPUs ignores the disabled state of the APIC and ends up overwriting an existing valid entry and as a result, APIC calibration hangs in the guest during kdump initialization. Remove the bogus LDR/DFR initialization. This is not intended to work around the KVM APIC bug. The LDR/DFR ininitalization is wrong on its own. The issue goes back into the pre git history. The fixes tag is the commit in the bitkeeper import which introduced bigsmp support in 2003. git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Fixes: db7b9e9f26b8 ("[PATCH] Clustered APIC setup for >8 CPU systems") Suggested-by: Thomas Gleixner Signed-off-by: Bandan Das Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190826101513.5080-2-bsd@redhat.com --- arch/x86/kernel/apic/bigsmp_32.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index afee386ff711..caedd8d60d36 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -38,32 +38,12 @@ static int bigsmp_early_logical_apicid(int cpu) return early_per_cpu(x86_cpu_to_apicid, cpu); } -static inline unsigned long calculate_ldr(int cpu) -{ - unsigned long val, id; - - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - id = per_cpu(x86_bios_cpu_apicid, cpu); - val |= SET_APIC_LOGICAL_ID(id); - - return val; -} - /* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... + * bigsmp enables physical destination mode + * and doesn't use LDR and DFR */ static void bigsmp_init_apic_ldr(void) { - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_FLAT); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); } static void bigsmp_setup_apic_routing(void) -- cgit v1.2.1 From 558682b5291937a70748d36fd9ba757fb25b99ae Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Mon, 26 Aug 2019 06:15:13 -0400 Subject: x86/apic: Include the LDR when clearing out APIC registers Although APIC initialization will typically clear out the LDR before setting it, the APIC cleanup code should reset the LDR. This was discovered with a 32-bit KVM guest jumping into a kdump kernel. The stale bits in the LDR triggered a bug in the KVM APIC implementation which caused the destination mapping for VCPUs to be corrupted. Note that this isn't intended to paper over the KVM APIC bug. The kernel has to clear the LDR when resetting the APIC registers except when X2APIC is enabled. This lacks a Fixes tag because missing to clear LDR goes way back into pre git history. [ tglx: Made x2apic_enabled a function call as required ] Signed-off-by: Bandan Das Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190826101513.5080-3-bsd@redhat.com --- arch/x86/kernel/apic/apic.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index aa5495d0f478..dba2828b779a 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1179,6 +1179,10 @@ void clear_local_APIC(void) apic_write(APIC_LVT0, v | APIC_LVT_MASKED); v = apic_read(APIC_LVT1); apic_write(APIC_LVT1, v | APIC_LVT_MASKED); + if (!x2apic_enabled()) { + v = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + apic_write(APIC_LDR, v); + } if (maxlvt >= 4) { v = apic_read(APIC_LVTPC); apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); -- cgit v1.2.1 From bd736ed3e2d1088d9b4050f727342e1e619c3841 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 15 Aug 2019 17:26:17 -0400 Subject: SUNRPC: Don't handle errors if the bind/connect succeeded Don't handle errors in call_bind_status()/call_connect_status() if it turns out that a previous call caused it to succeed. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v5.1+ --- net/sunrpc/clnt.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d8679b6027e9..3b731411d8e8 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1970,6 +1970,7 @@ call_bind(struct rpc_task *task) static void call_bind_status(struct rpc_task *task) { + struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; int status = -EIO; if (rpc_task_transmitted(task)) { @@ -1977,14 +1978,15 @@ call_bind_status(struct rpc_task *task) return; } - if (task->tk_status >= 0) { - dprint_status(task); + dprint_status(task); + trace_rpc_bind_status(task); + if (task->tk_status >= 0) + goto out_next; + if (xprt_bound(xprt)) { task->tk_status = 0; - task->tk_action = call_connect; - return; + goto out_next; } - trace_rpc_bind_status(task); switch (task->tk_status) { case -ENOMEM: dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid); @@ -2043,7 +2045,9 @@ call_bind_status(struct rpc_task *task) rpc_call_rpcerror(task, status); return; - +out_next: + task->tk_action = call_connect; + return; retry_timeout: task->tk_status = 0; task->tk_action = call_bind; @@ -2090,6 +2094,7 @@ call_connect(struct rpc_task *task) static void call_connect_status(struct rpc_task *task) { + struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; struct rpc_clnt *clnt = task->tk_client; int status = task->tk_status; @@ -2099,8 +2104,17 @@ call_connect_status(struct rpc_task *task) } dprint_status(task); - trace_rpc_connect_status(task); + + if (task->tk_status == 0) { + clnt->cl_stats->netreconn++; + goto out_next; + } + if (xprt_connected(xprt)) { + task->tk_status = 0; + goto out_next; + } + task->tk_status = 0; switch (status) { case -ECONNREFUSED: @@ -2131,13 +2145,12 @@ call_connect_status(struct rpc_task *task) case -EAGAIN: case -ETIMEDOUT: goto out_retry; - case 0: - clnt->cl_stats->netreconn++; - task->tk_action = call_transmit; - return; } rpc_call_rpcerror(task, status); return; +out_next: + task->tk_action = call_transmit; + return; out_retry: /* Check for timeouts before looping back to call_bind */ task->tk_action = call_bind; -- cgit v1.2.1 From bf2bf9b80e0cd3568ddc85a241abe0dd8b46ebdc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 15 Aug 2019 20:18:48 -0400 Subject: pNFS/flexfiles: Turn off soft RPC calls The pNFS/flexfiles I/O requests are sent with the SOFTCONN flag set, so they automatically time out if the connection breaks. It should therefore not be necessary to have the soft flag set in addition. Fixes: 5f01d9539496 ("nfs41: create NFSv3 DS connection if specified") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_nfs.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index c0046c348910..82af4809b869 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -627,11 +627,16 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, /* Add this address as an alias */ rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args, rpc_clnt_test_and_add_xprt, NULL); - } else - clp = get_v3_ds_connect(mds_srv, - (struct sockaddr *)&da->da_addr, - da->da_addrlen, IPPROTO_TCP, - timeo, retrans); + continue; + } + clp = get_v3_ds_connect(mds_srv, + (struct sockaddr *)&da->da_addr, + da->da_addrlen, IPPROTO_TCP, + timeo, retrans); + if (IS_ERR(clp)) + continue; + clp->cl_rpcclient->cl_softerr = 0; + clp->cl_rpcclient->cl_softrtry = 0; } if (IS_ERR(clp)) { -- cgit v1.2.1 From 80f455da6cd0998a5be30a8af24ea2a22815c212 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 15 Aug 2019 22:55:19 -0400 Subject: SUNRPC: Handle EADDRINUSE and ENOBUFS correctly If a connect or bind attempt returns EADDRINUSE, that means we want to retry with a different port. It is not a fatal connection error. Similarly, ENOBUFS is not fatal, but just indicates a memory allocation issue. Retry after a short delay. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 3b731411d8e8..8a25440b771c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2005,6 +2005,9 @@ call_bind_status(struct rpc_task *task) task->tk_rebind_retry--; rpc_delay(task, 3*HZ); goto retry_timeout; + case -ENOBUFS: + rpc_delay(task, HZ >> 2); + goto retry_timeout; case -EAGAIN: goto retry_timeout; case -ETIMEDOUT: @@ -2028,7 +2031,6 @@ call_bind_status(struct rpc_task *task) case -ENETDOWN: case -EHOSTUNREACH: case -ENETUNREACH: - case -ENOBUFS: case -EPIPE: dprintk("RPC: %5u remote rpcbind unreachable: %d\n", task->tk_pid, task->tk_status); @@ -2131,8 +2133,6 @@ call_connect_status(struct rpc_task *task) case -ENETDOWN: case -ENETUNREACH: case -EHOSTUNREACH: - case -EADDRINUSE: - case -ENOBUFS: case -EPIPE: xprt_conditional_disconnect(task->tk_rqstp->rq_xprt, task->tk_rqstp->rq_connect_cookie); @@ -2141,10 +2141,14 @@ call_connect_status(struct rpc_task *task) /* retry with existing socket, after a delay */ rpc_delay(task, 3*HZ); /* fall through */ + case -EADDRINUSE: case -ENOTCONN: case -EAGAIN: case -ETIMEDOUT: goto out_retry; + case -ENOBUFS: + rpc_delay(task, HZ >> 2); + goto out_retry; } rpc_call_rpcerror(task, status); return; -- cgit v1.2.1 From d5711920ec6e578f51db95caa6f185f5090b865e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Aug 2019 08:37:26 -0400 Subject: Revert "NFSv4/flexfiles: Abort I/O early if the layout segment was invalidated" This reverts commit a79f194aa4879e9baad118c3f8bb2ca24dbef765. The mechanism for aborting I/O is racy, since we are not guaranteed that the request is asleep while we're changing both task->tk_status and task->tk_action. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v5.1 --- fs/nfs/flexfilelayout/flexfilelayout.c | 17 ----------------- include/linux/sunrpc/sched.h | 1 - net/sunrpc/xprt.c | 7 ------- 3 files changed, 25 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index b04e20d28162..2c7e1eca1ed7 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1148,8 +1148,6 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, break; case -NFS4ERR_RETRY_UNCACHED_REP: break; - case -EAGAIN: - return -NFS4ERR_RESET_TO_PNFS; /* Invalidate Layout errors */ case -NFS4ERR_PNFS_NO_LAYOUT: case -ESTALE: /* mapped NFS4ERR_STALE */ @@ -1210,7 +1208,6 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, case -EBADHANDLE: case -ELOOP: case -ENOSPC: - case -EAGAIN: break; case -EJUKEBOX: nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); @@ -1445,16 +1442,6 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) ff_layout_read_prepare_common(task, hdr); } -static void -ff_layout_io_prepare_transmit(struct rpc_task *task, - void *data) -{ - struct nfs_pgio_header *hdr = data; - - if (!pnfs_is_valid_lseg(hdr->lseg)) - rpc_exit(task, -EAGAIN); -} - static void ff_layout_read_call_done(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; @@ -1740,7 +1727,6 @@ static void ff_layout_commit_release(void *data) static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { .rpc_call_prepare = ff_layout_read_prepare_v3, - .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit, .rpc_call_done = ff_layout_read_call_done, .rpc_count_stats = ff_layout_read_count_stats, .rpc_release = ff_layout_read_release, @@ -1748,7 +1734,6 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { .rpc_call_prepare = ff_layout_read_prepare_v4, - .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit, .rpc_call_done = ff_layout_read_call_done, .rpc_count_stats = ff_layout_read_count_stats, .rpc_release = ff_layout_read_release, @@ -1756,7 +1741,6 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { .rpc_call_prepare = ff_layout_write_prepare_v3, - .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit, .rpc_call_done = ff_layout_write_call_done, .rpc_count_stats = ff_layout_write_count_stats, .rpc_release = ff_layout_write_release, @@ -1764,7 +1748,6 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { .rpc_call_prepare = ff_layout_write_prepare_v4, - .rpc_call_prepare_transmit = ff_layout_io_prepare_transmit, .rpc_call_done = ff_layout_write_call_done, .rpc_count_stats = ff_layout_write_count_stats, .rpc_release = ff_layout_write_release, diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index baa3ecdb882f..27536b961552 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -98,7 +98,6 @@ typedef void (*rpc_action)(struct rpc_task *); struct rpc_call_ops { void (*rpc_call_prepare)(struct rpc_task *, void *); - void (*rpc_call_prepare_transmit)(struct rpc_task *, void *); void (*rpc_call_done)(struct rpc_task *, void *); void (*rpc_count_stats)(struct rpc_task *, void *); void (*rpc_release)(void *); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 783748dc5e6f..2e71f5455c6c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1408,13 +1408,6 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) status = -EBADMSG; goto out_dequeue; } - if (task->tk_ops->rpc_call_prepare_transmit) { - task->tk_ops->rpc_call_prepare_transmit(task, - task->tk_calldata); - status = task->tk_status; - if (status < 0) - goto out_dequeue; - } if (RPC_SIGNALLED(task)) { status = -ERESTARTSYS; goto out_dequeue; -- cgit v1.2.1 From c82e5472c9980e0e483f4b689044150eefaca408 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Aug 2019 08:58:48 -0400 Subject: SUNRPC: Handle connection breakages correctly in call_status() If the connection breaks while we're waiting for a reply from the server, then we want to immediately try to reconnect. Fixes: ec6017d90359 ("SUNRPC fix regression in umount of a secure mount") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8a25440b771c..a07b516e503a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2382,7 +2382,7 @@ call_status(struct rpc_task *task) case -ECONNABORTED: case -ENOTCONN: rpc_force_rebind(clnt); - /* fall through */ + break; case -EADDRINUSE: rpc_delay(task, 3*HZ); /* fall through */ -- cgit v1.2.1 From 7af46292dadcf8870946916f79fdddf79bd7267f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Aug 2019 08:37:50 -0400 Subject: pNFS/flexfiles: Don't time out requests on hard mounts If the mount is hard, we should ignore the 'io_maxretrans' module parameter so that we always keep retrying. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 2c7e1eca1ed7..5657b7f2611f 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -928,7 +929,9 @@ retry: pgm = &pgio->pg_mirrors[0]; pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; - pgio->pg_maxretrans = io_maxretrans; + if (NFS_SERVER(pgio->pg_inode)->flags & + (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) + pgio->pg_maxretrans = io_maxretrans; return; out_nolseg: if (pgio->pg_error < 0) @@ -940,6 +943,7 @@ out_mds: pgio->pg_lseg); pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; + pgio->pg_maxretrans = 0; nfs_pageio_reset_read_mds(pgio); } @@ -1000,7 +1004,9 @@ retry: pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; } - pgio->pg_maxretrans = io_maxretrans; + if (NFS_SERVER(pgio->pg_inode)->flags & + (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) + pgio->pg_maxretrans = io_maxretrans; return; out_mds: @@ -1010,6 +1016,7 @@ out_mds: pgio->pg_lseg); pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; + pgio->pg_maxretrans = 0; nfs_pageio_reset_write_mds(pgio); } -- cgit v1.2.1 From 8f54c7a4babf58bbaf849e126f7ae9664bdc9e04 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 15 Aug 2019 12:26:05 -0400 Subject: NFS: Fix spurious EIO read errors If the client attempts to read a page, but the read fails due to some spurious error (e.g. an ACCESS error or a timeout, ...) then we need to allow other processes to retry. Also try to report errors correctly when doing a synchronous readpage. Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 10 ++++++++++ fs/nfs/read.c | 35 ++++++++++++++++++++++++++--------- fs/nfs/write.c | 12 ------------ 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a2346a2f8361..e64f810223be 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -775,3 +775,13 @@ static inline bool nfs_error_is_fatal(int err) } } +static inline bool nfs_error_is_fatal_on_server(int err) +{ + switch (err) { + case 0: + case -ERESTARTSYS: + case -EINTR: + return false; + } + return nfs_error_is_fatal(err); +} diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c19841c82b6a..cfe0b586eadd 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -91,19 +91,25 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) } EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); -static void nfs_readpage_release(struct nfs_page *req) +static void nfs_readpage_release(struct nfs_page *req, int error) { struct inode *inode = d_inode(nfs_req_openctx(req)->dentry); + struct page *page = req->wb_page; dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode), req->wb_bytes, (long long)req_offset(req)); + if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT) + SetPageError(page); if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { - if (PageUptodate(req->wb_page)) - nfs_readpage_to_fscache(inode, req->wb_page, 0); + struct address_space *mapping = page_file_mapping(page); - unlock_page(req->wb_page); + if (PageUptodate(page)) + nfs_readpage_to_fscache(inode, page, 0); + else if (!PageError(page) && !PagePrivate(page)) + generic_error_remove_page(mapping, page); + unlock_page(page); } nfs_release_request(req); } @@ -131,7 +137,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, &nfs_async_read_completion_ops); if (!nfs_pageio_add_request(&pgio, new)) { nfs_list_remove_request(new); - nfs_readpage_release(new); + nfs_readpage_release(new, pgio.pg_error); } nfs_pageio_complete(&pgio); @@ -153,6 +159,7 @@ static void nfs_page_group_set_uptodate(struct nfs_page *req) static void nfs_read_completion(struct nfs_pgio_header *hdr) { unsigned long bytes = 0; + int error; if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) goto out; @@ -179,14 +186,19 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr) zero_user_segment(page, start, end); } } + error = 0; bytes += req->wb_bytes; if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { if (bytes <= hdr->good_bytes) nfs_page_group_set_uptodate(req); + else { + error = hdr->error; + xchg(&nfs_req_openctx(req)->error, error); + } } else nfs_page_group_set_uptodate(req); nfs_list_remove_request(req); - nfs_readpage_release(req); + nfs_readpage_release(req, error); } out: hdr->release(hdr); @@ -213,7 +225,7 @@ nfs_async_read_error(struct list_head *head, int error) while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_readpage_release(req); + nfs_readpage_release(req, error); } } @@ -337,8 +349,13 @@ int nfs_readpage(struct file *file, struct page *page) goto out; } + xchg(&ctx->error, 0); error = nfs_readpage_async(ctx, inode, page); - + if (!error) { + error = wait_on_page_locked_killable(page); + if (!PageUptodate(page) && !error) + error = xchg(&ctx->error, 0); + } out: put_nfs_open_context(ctx); return error; @@ -372,8 +389,8 @@ readpage_async_filler(void *data, struct page *page) zero_user_segment(page, len, PAGE_SIZE); if (!nfs_pageio_add_request(desc->pgio, new)) { nfs_list_remove_request(new); - nfs_readpage_release(new); error = desc->pgio->pg_error; + nfs_readpage_release(new, error); goto out; } return 0; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3399149435ce..cee9905e419c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -599,18 +599,6 @@ static void nfs_write_error(struct nfs_page *req, int error) nfs_release_request(req); } -static bool -nfs_error_is_fatal_on_server(int err) -{ - switch (err) { - case 0: - case -ERESTARTSYS: - case -EINTR: - return false; - } - return nfs_error_is_fatal(err); -} - /* * Find an associated nfs write request, and prepare to flush it out * May return an error if the user signalled nfs_wait_on_request(). -- cgit v1.2.1 From 96c4145599b30c0eb6cbeaa24207802452dd1872 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 24 Aug 2019 10:39:00 -0400 Subject: NFS: Fix writepage(s) error handling to not report errors twice If writepage()/writepages() saw an error, but handled it without reporting it, we should not be re-reporting that error on exit. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index cee9905e419c..d193042fa228 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -621,12 +621,12 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags)); /* If there is a fatal error that covers this write, just exit */ - ret = 0; mapping = page_file_mapping(page); - if (test_bit(AS_ENOSPC, &mapping->flags) || - test_bit(AS_EIO, &mapping->flags)) + ret = pgio->pg_error; + if (nfs_error_is_fatal_on_server(ret)) goto out_launder; + ret = 0; if (!nfs_pageio_add_request(pgio, req)) { ret = pgio->pg_error; /* @@ -638,6 +638,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, } else ret = -EAGAIN; nfs_redirty_request(req); + pgio->pg_error = 0; } else nfs_add_stats(page_file_mapping(page)->host, NFSIOS_WRITEPAGES, 1); @@ -657,7 +658,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, ret = nfs_page_async_flush(pgio, page); if (ret == -EAGAIN) { redirty_page_for_writepage(wbc, page); - ret = 0; + ret = AOP_WRITEPAGE_ACTIVATE; } return ret; } @@ -676,10 +677,11 @@ static int nfs_writepage_locked(struct page *page, nfs_pageio_init_write(&pgio, inode, 0, false, &nfs_async_write_completion_ops); err = nfs_do_writepage(page, wbc, &pgio); + pgio.pg_error = 0; nfs_pageio_complete(&pgio); if (err < 0) return err; - if (pgio.pg_error < 0) + if (nfs_error_is_fatal(pgio.pg_error)) return pgio.pg_error; return 0; } @@ -689,7 +691,8 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) int ret; ret = nfs_writepage_locked(page, wbc); - unlock_page(page); + if (ret != AOP_WRITEPAGE_ACTIVATE) + unlock_page(page); return ret; } @@ -698,7 +701,8 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control * int ret; ret = nfs_do_writepage(page, wbc, data); - unlock_page(page); + if (ret != AOP_WRITEPAGE_ACTIVATE) + unlock_page(page); return ret; } @@ -724,13 +728,14 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) &nfs_async_write_completion_ops); pgio.pg_io_completion = ioc; err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); + pgio.pg_error = 0; nfs_pageio_complete(&pgio); nfs_io_completion_put(ioc); if (err < 0) goto out_err; err = pgio.pg_error; - if (err < 0) + if (nfs_error_is_fatal(err)) goto out_err; return 0; out_err: -- cgit v1.2.1 From 42068e1ef961c719f967dbbb4ddcb394a0ba7917 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 16 Aug 2019 14:56:35 +0200 Subject: drm/amdgpu: fix dma_fence_wait without reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to grab a reference to the fence we wait for. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index f539a2a92774..7398b4850649 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -534,21 +534,24 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity) { struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); - unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1); - struct dma_fence *other = centity->fences[idx]; + struct dma_fence *other; + unsigned idx; + long r; - if (other) { - signed long r; - r = dma_fence_wait(other, true); - if (r < 0) { - if (r != -ERESTARTSYS) - DRM_ERROR("Error (%ld) waiting for fence!\n", r); + spin_lock(&ctx->ring_lock); + idx = centity->sequence & (amdgpu_sched_jobs - 1); + other = dma_fence_get(centity->fences[idx]); + spin_unlock(&ctx->ring_lock); - return r; - } - } + if (!other) + return 0; - return 0; + r = dma_fence_wait(other, true); + if (r < 0 && r != -ERESTARTSYS) + DRM_ERROR("Error (%ld) waiting for fence!\n", r); + + dma_fence_put(other); + return r; } void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) -- cgit v1.2.1 From 86c28b2d69f93a218a9a5cef146ed0097a98687f Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 23 Aug 2019 19:00:28 -0700 Subject: nfp: bpf: fix latency bug when updating stack index register NFP is using Local Memory to model stack. LM_addr could be used as base of a 16 32-bit word region of Local Memory. Then, if the stack offset is beyond the current region, the local index needs to be updated. The update needs at least three cycles to take effect, therefore the sequence normally looks like: local_csr_wr[ActLMAddr3, gprB_5] nop nop nop If the local index switch happens on a narrow loads, then the instruction preparing value to zero high 32-bit of the destination register could be counted as one cycle, the sequence then could be something like: local_csr_wr[ActLMAddr3, gprB_5] nop nop immed[gprB_5, 0] However, we have zero extension optimization that zeroing high 32-bit could be eliminated, therefore above IMMED insn won't be available for which case the first sequence needs to be generated. Fixes: 0b4de1ff19bf ("nfp: bpf: eliminate zero extension code-gen") Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 4054b70d7719..5afcb3c4c2ef 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1163,7 +1163,7 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool clr_gpr, lmem_step step) { s32 off = nfp_prog->stack_frame_depth + meta->insn.off + ptr_off; - bool first = true, last; + bool first = true, narrow_ld, last; bool needs_inc = false; swreg stack_off_reg; u8 prev_gpr = 255; @@ -1209,13 +1209,22 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, needs_inc = true; } + + narrow_ld = clr_gpr && size < 8; + if (lm3) { + unsigned int nop_cnt; + emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3); - /* For size < 4 one slot will be filled by zeroing of upper. */ - wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3); + /* For size < 4 one slot will be filled by zeroing of upper, + * but be careful, that zeroing could be eliminated by zext + * optimization. + */ + nop_cnt = narrow_ld && meta->flags & FLAG_INSN_DO_ZEXT ? 2 : 3; + wrp_nops(nfp_prog, nop_cnt); } - if (clr_gpr && size < 8) + if (narrow_ld) wrp_zext(nfp_prog, meta, gpr); while (size) { -- cgit v1.2.1 From ede7c460b1da5be7b8ef4efe47f1687babf06408 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 22 Aug 2019 00:53:58 +0530 Subject: bpf: handle 32-bit zext during constant blinding Since BPF constant blinding is performed after the verifier pass, the ALU32 instructions inserted for doubleword immediate loads don't have a corresponding zext instruction. This is causing a kernel oops on powerpc and can be reproduced by running 'test_cgroup_storage' with bpf_jit_harden=2. Fix this by emitting BPF_ZEXT during constant blinding if prog->aux->verifier_zext is set. Fixes: a4b1d3c1ddf6cb ("bpf: verifier: insert zero extension according to analysis result") Reported-by: Michael Ellerman Signed-off-by: Naveen N. Rao Reviewed-by: Jiong Wang Signed-off-by: Daniel Borkmann --- kernel/bpf/core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 8191a7db2777..66088a9e9b9e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -890,7 +890,8 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog, static int bpf_jit_blind_insn(const struct bpf_insn *from, const struct bpf_insn *aux, - struct bpf_insn *to_buff) + struct bpf_insn *to_buff, + bool emit_zext) { struct bpf_insn *to = to_buff; u32 imm_rnd = get_random_int(); @@ -1005,6 +1006,8 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */ *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm); *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + if (emit_zext) + *to++ = BPF_ZEXT_REG(BPF_REG_AX); *to++ = BPF_ALU64_REG(BPF_OR, aux[0].dst_reg, BPF_REG_AX); break; @@ -1088,7 +1091,8 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) insn[1].code == 0) memcpy(aux, insn, sizeof(aux)); - rewritten = bpf_jit_blind_insn(insn, aux, insn_buff); + rewritten = bpf_jit_blind_insn(insn, aux, insn_buff, + clone->aux->verifier_zext); if (!rewritten) continue; -- cgit v1.2.1 From 21649c0b6b7899f4fa3099c46d3d027f60b107ec Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Thu, 22 Aug 2019 08:17:40 -0400 Subject: drm/powerplay: Fix Vega20 Average Power value v4 The SMU changed reading from CurrSocketPower to AverageSocketPower, so reflect this accordingly. This fixes the issue where Average Power Consumption was being reported as 0 from SMU 40.46-onward v2: Fixed headline prefix v3: Add check for SMU version for proper compatibility v4: Style fix Signed-off-by: Kent Russell Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 5 ++++- drivers/gpu/drm/amd/powerplay/vega20_ppt.c | 10 +++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index f27c6fbb192e..e9de1c0a3b87 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -2101,7 +2101,10 @@ static int vega20_get_gpu_power(struct pp_hwmgr *hwmgr, if (ret) return ret; - *query = metrics_table.CurrSocketPower << 8; + if (hwmgr->smu_version < 0x282e00) + *query = metrics_table.CurrSocketPower << 8; + else + *query = metrics_table.AverageSocketPower << 8; return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index dd6fd1c8bf24..29b64ee53c29 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -3050,6 +3050,7 @@ static int vega20_get_fan_speed_percent(struct smu_context *smu, static int vega20_get_gpu_power(struct smu_context *smu, uint32_t *value) { + uint32_t smu_version; int ret = 0; SmuMetrics_t metrics; @@ -3060,7 +3061,14 @@ static int vega20_get_gpu_power(struct smu_context *smu, uint32_t *value) if (ret) return ret; - *value = metrics.CurrSocketPower << 8; + ret = smu_get_smc_version(smu, NULL, &smu_version); + if (ret) + return ret; + + if (smu_version < 0x282e00) + *value = metrics.CurrSocketPower << 8; + else + *value = metrics.AverageSocketPower << 8; return 0; } -- cgit v1.2.1 From 23b7f6c41d4717b1638eca47e09d7e99fc7b9fd9 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Fri, 23 Aug 2019 09:13:18 -0400 Subject: drm/powerplay: Fix Vega20 power reading again For the 40.46 SMU release, they changed CurrSocketPower to AverageSocketPower, but this was changed back in 40.47 so just check if it's 40.46 and make the appropriate change Tested with 40.45, 40.46 and 40.47 successfully Signed-off-by: Kent Russell Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 7 ++++--- drivers/gpu/drm/amd/powerplay/vega20_ppt.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index e9de1c0a3b87..81658dc8fafc 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -2101,10 +2101,11 @@ static int vega20_get_gpu_power(struct pp_hwmgr *hwmgr, if (ret) return ret; - if (hwmgr->smu_version < 0x282e00) - *query = metrics_table.CurrSocketPower << 8; - else + /* For the 40.46 release, they changed the value name */ + if (hwmgr->smu_version == 0x282e00) *query = metrics_table.AverageSocketPower << 8; + else + *query = metrics_table.CurrSocketPower << 8; return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index 29b64ee53c29..6a14497257e4 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -3065,10 +3065,11 @@ static int vega20_get_gpu_power(struct smu_context *smu, uint32_t *value) if (ret) return ret; - if (smu_version < 0x282e00) - *value = metrics.CurrSocketPower << 8; - else + /* For the 40.46 release, they changed the value name */ + if (smu_version == 0x282e00) *value = metrics.AverageSocketPower << 8; + else + *value = metrics.CurrSocketPower << 8; return 0; } -- cgit v1.2.1 From ddfd151f3def9258397fcde7a372205a2d661903 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 26 Aug 2019 14:55:20 +1000 Subject: KVM: PPC: Book3S: Fix incorrect guest-to-user-translation error handling H_PUT_TCE_INDIRECT handlers receive a page with up to 512 TCEs from a guest. Although we verify correctness of TCEs before we do anything with the existing tables, there is a small window when a check in kvmppc_tce_validate might pass and right after that the guest alters the page of TCEs, causing an early exit from the handler and leaving srcu_read_lock(&vcpu->kvm->srcu) (virtual mode) or lock_rmap(rmap) (real mode) locked. This fixes the bug by jumping to the common exit code with an appropriate unlock. Cc: stable@vger.kernel.org # v4.11+ Fixes: 121f80ba68f1 ("KVM: PPC: VFIO: Add in-kernel acceleration for VFIO") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_vio.c | 6 ++++-- arch/powerpc/kvm/book3s_64_vio_hv.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index e99a14798ab0..c4b606fe73eb 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -660,8 +660,10 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, } tce = be64_to_cpu(tce); - if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) - return H_PARAMETER; + if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) { + ret = H_PARAMETER; + goto unlock_exit; + } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index f50bbeedfc66..b4f20f13b860 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -556,8 +556,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); ua = 0; - if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) - return H_PARAMETER; + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { + ret = H_PARAMETER; + goto unlock_exit; + } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, -- cgit v1.2.1 From 5752bc4373b21c3fb1dd6db4dcdd569fae391a1d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 8 Jul 2019 14:53:02 +0200 Subject: mfd: rk808: Mark pm functions __maybe_unused The newly added suspend/resume functions are only used if CONFIG_PM is enabled: drivers/mfd/rk808.c:752:12: error: 'rk8xx_resume' defined but not used [-Werror=unused-function] drivers/mfd/rk808.c:732:12: error: 'rk8xx_suspend' defined but not used [-Werror=unused-function] Mark them as __maybe_unused so the compiler can silently drop them when they are not needed. Fixes: 586c1b4125b3 ("mfd: rk808: Add RK817 and RK809 support") Signed-off-by: Arnd Bergmann Signed-off-by: Lee Jones --- drivers/mfd/rk808.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/rk808.c b/drivers/mfd/rk808.c index 601cefb5c9d8..9a9e6315ba46 100644 --- a/drivers/mfd/rk808.c +++ b/drivers/mfd/rk808.c @@ -729,7 +729,7 @@ static int rk808_remove(struct i2c_client *client) return 0; } -static int rk8xx_suspend(struct device *dev) +static int __maybe_unused rk8xx_suspend(struct device *dev) { struct rk808 *rk808 = i2c_get_clientdata(rk808_i2c_client); int ret = 0; @@ -749,7 +749,7 @@ static int rk8xx_suspend(struct device *dev) return ret; } -static int rk8xx_resume(struct device *dev) +static int __maybe_unused rk8xx_resume(struct device *dev) { struct rk808 *rk808 = i2c_get_clientdata(rk808_i2c_client); int ret = 0; -- cgit v1.2.1 From ed5fa90660d63bcec4c3a62b03fed9427418b53d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 20 Aug 2019 19:16:57 +0300 Subject: drm/i915: Do not create a new max_bpc prop for MST connectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're not allowed to create new properties after device registration so for MST connectors we need to either create the max_bpc property earlier, or we reuse one we already have. Let's do the latter apporach since the corresponding SST connector already has the prop and its min/max are correct also for the MST connector. The problem was highlighted by commit 4f5368b5541a ("drm/kms: Catch mode_object lifetime errors") which results in the following spew: [ 1330.878941] WARNING: CPU: 2 PID: 1554 at drivers/gpu/drm/drm_mode_object.c:45 __drm_mode_object_add+0xa0/0xb0 [drm] ... [ 1330.879008] Call Trace: [ 1330.879023] drm_property_create+0xba/0x180 [drm] [ 1330.879036] drm_property_create_range+0x15/0x30 [drm] [ 1330.879048] drm_connector_attach_max_bpc_property+0x62/0x80 [drm] [ 1330.879086] intel_dp_add_mst_connector+0x11f/0x140 [i915] [ 1330.879094] drm_dp_add_port.isra.20+0x20b/0x440 [drm_kms_helper] ... Cc: stable@vger.kernel.org Cc: Lyude Paul Cc: sunpeng.li@amd.com Cc: Daniel Vetter Cc: Sean Paul Fixes: 5ca0ef8a56b8 ("drm/i915: Add max_bpc property for DP MST") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190820161657.9658-1-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza Reviewed-by: Lyude Paul (cherry picked from commit 1b9bd09630d4db4827cc04d358a41a16a6bc2cb0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 60652ebbdf61..18e4cba76720 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -539,7 +539,15 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo intel_attach_force_audio_property(connector); intel_attach_broadcast_rgb_property(connector); - drm_connector_attach_max_bpc_property(connector, 6, 12); + + /* + * Reuse the prop from the SST connector because we're + * not allowed to create new props after device registration. + */ + connector->max_bpc_property = + intel_dp->attached_connector->base.max_bpc_property; + if (connector->max_bpc_property) + drm_connector_attach_max_bpc_property(connector, 6, 12); return connector; -- cgit v1.2.1 From 0a3dfbb5cd9033752639ef33e319c2f2863c713a Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Tue, 20 Aug 2019 13:46:17 +0800 Subject: drm/i915: Don't deballoon unused ggtt drm_mm_node in linux guest The following call trace may exist in linux guest dmesg when guest i915 driver is unloaded. [ 90.776610] [drm:vgt_deballoon_space.isra.0 [i915]] deballoon space: range [0x0 - 0x0] 0 KiB. [ 90.776621] BUG: unable to handle kernel NULL pointer dereference at 00000000000000c0 [ 90.776691] IP: drm_mm_remove_node+0x4d/0x320 [drm] [ 90.776718] PGD 800000012c7d0067 P4D 800000012c7d0067 PUD 138e4c067 PMD 0 [ 90.777091] task: ffff9adab60f2f00 task.stack: ffffaf39c0fe0000 [ 90.777142] RIP: 0010:drm_mm_remove_node+0x4d/0x320 [drm] [ 90.777573] Call Trace: [ 90.777653] intel_vgt_deballoon+0x4c/0x60 [i915] [ 90.777729] i915_ggtt_cleanup_hw+0x121/0x190 [i915] [ 90.777792] i915_driver_unload+0x145/0x180 [i915] [ 90.777856] i915_pci_remove+0x15/0x20 [i915] [ 90.777890] pci_device_remove+0x3b/0xc0 [ 90.777916] device_release_driver_internal+0x157/0x220 [ 90.777945] driver_detach+0x39/0x70 [ 90.777967] bus_remove_driver+0x51/0xd0 [ 90.777990] pci_unregister_driver+0x23/0x90 [ 90.778019] SyS_delete_module+0x1da/0x240 [ 90.778045] entry_SYSCALL_64_fastpath+0x24/0x87 [ 90.778072] RIP: 0033:0x7f34312af067 [ 90.778092] RSP: 002b:00007ffdea3da0d8 EFLAGS: 00000206 [ 90.778297] RIP: drm_mm_remove_node+0x4d/0x320 [drm] RSP: ffffaf39c0fe3dc0 [ 90.778344] ---[ end trace f4b1bc8305fc59dd ]--- Four drm_mm_node are used to reserve guest ggtt space, but some of them may be skipped and not initialised due to space constraints in intel_vgt_balloon(). If drm_mm_remove_node() is called with uninitialized drm_mm_node, the above call trace occurs. This patch check drm_mm_node's validity before calling drm_mm_remove_node(). Fixes: ff8f797557c7("drm/i915: return the correct usable aperture size under gvt environment") Cc: stable@vger.kernel.org Signed-off-by: Xiong Zhang Acked-by: Zhenyu Wang Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1566279978-9659-1-git-send-email-xiong.y.zhang@intel.com (cherry picked from commit 4776f3529d6b1e47f02904ad1d264d25ea22b27b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_vgpu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 94d3992b599d..724627afdedc 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -101,6 +101,9 @@ static struct _balloon_info_ bl_info; static void vgt_deballoon_space(struct i915_ggtt *ggtt, struct drm_mm_node *node) { + if (!drm_mm_node_allocated(node)) + return; + DRM_DEBUG_DRIVER("deballoon space: range [0x%llx - 0x%llx] %llu KiB.\n", node->start, node->start + node->size, -- cgit v1.2.1 From 5b6eefd667847ca6bd6925f7bd1afcecc457c889 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Wed, 21 Aug 2019 14:59:50 -0700 Subject: drm/i915/dp: Fix DSC enable code to use cpu_transcoder instead of encoder->type This patch fixes the intel_configure_pps_for_dsc_encoder() function to use cpu_transcoder instead of encoder->type to select the correct DSC registers that was wrongly used in the original patch for one DSC register isntance. Fixes: 7182414e2530 ("drm/i915/dp: Configure i915 Picture parameter Set registers during DSC enabling") Cc: Ville Syrjala Cc: Maarten Lankhorst Cc: Jani Nikula Cc: # v5.0+ Signed-off-by: Manasi Navare Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20190821215950.24223-1-manasi.d.navare@intel.com (cherry picked from commit d4c61c4a16decd8ace8660f22c81609a539fccba) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_vdsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index ffec807b8960..f413904a3e96 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -541,7 +541,7 @@ static void intel_configure_pps_for_dsc_encoder(struct intel_encoder *encoder, pps_val |= DSC_PIC_HEIGHT(vdsc_cfg->pic_height) | DSC_PIC_WIDTH(vdsc_cfg->pic_width / num_vdsc_instances); DRM_INFO("PPS2 = 0x%08x\n", pps_val); - if (encoder->type == INTEL_OUTPUT_EDP) { + if (cpu_transcoder == TRANSCODER_EDP) { I915_WRITE(DSCA_PICTURE_PARAMETER_SET_2, pps_val); /* * If 2 VDSC instances are needed, configure PPS for second -- cgit v1.2.1 From 32f0a982650b123bdab36865617d3e03ebcacf3b Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Fri, 23 Aug 2019 16:52:51 -0400 Subject: drm/i915: Call dma_set_max_seg_size() in i915_driver_hw_probe() Currently, we don't call dma_set_max_seg_size() for i915 because we intentionally do not limit the segment length that the device supports. However, this results in a warning being emitted if we try to map anything larger than SZ_64K on a kernel with CONFIG_DMA_API_DEBUG_SG enabled: [ 7.751926] DMA-API: i915 0000:00:02.0: mapping sg segment longer than device claims to support [len=98304] [max=65536] [ 7.751934] WARNING: CPU: 5 PID: 474 at kernel/dma/debug.c:1220 debug_dma_map_sg+0x20f/0x340 This was originally brought up on https://bugs.freedesktop.org/show_bug.cgi?id=108517 , and the consensus there was it wasn't really useful to set a limit (and that dma-debug isn't really all that useful for i915 in the first place). Unfortunately though, CONFIG_DMA_API_DEBUG_SG is enabled in the debug configs for various distro kernels. Since a WARN_ON() will disable automatic problem reporting (and cause any CI with said option enabled to start complaining), we really should just fix the problem. Note that as me and Chris Wilson discussed, the other solution for this would be to make DMA-API not make such assumptions when a driver hasn't explicitly set a maximum segment size. But, taking a look at the commit which originally introduced this behavior, commit 78c47830a5cb ("dma-debug: check scatterlist segments"), there is an explicit mention of this assumption and how it applies to devices with no segment size: Conversely, devices which are less limited than the rather conservative defaults, or indeed have no limitations at all (e.g. GPUs with their own internal MMU), should be encouraged to set appropriate dma_parms, as they may get more efficient DMA mapping performance out of it. So unless there's any concerns (I'm open to discussion!), let's just follow suite and call dma_set_max_seg_size() with UINT_MAX as our limit to silence any warnings. Changes since v3: * Drop patch for enabling CONFIG_DMA_API_DEBUG_SG in CI. It looks like just turning it on causes the kernel to spit out bogus WARN_ONs() during some igt tests which would otherwise require teaching igt to disable the various DMA-API debugging options causing this. This is too much work to be worth it, since DMA-API debugging is useless for us. So, we'll just settle with this single patch to squelch WARN_ONs() during driver load for users that have CONFIG_DMA_API_DEBUG_SG turned on for some reason. * Move dma_set_max_seg_size() call into i915_driver_hw_probe() - Chris Wilson Signed-off-by: Lyude Paul Reviewed-by: Chris Wilson Cc: # v4.18+ Link: https://patchwork.freedesktop.org/patch/msgid/20190823205251.14298-1-lyude@redhat.com (cherry picked from commit acd674af95d3f627062007429b9c195c6b32361d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f62e3397d936..bac1ee94f63f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1598,6 +1598,12 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) pci_set_master(pdev); + /* + * We don't have a max segment size, so set it to the max so sg's + * debugging layer doesn't complain + */ + dma_set_max_seg_size(&pdev->dev, UINT_MAX); + /* overlay on gen2 is broken and can't address above 1G */ if (IS_GEN(dev_priv, 2)) { ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(30)); -- cgit v1.2.1 From c96e8483cb2da6695c8b8d0896fe7ae272a07b54 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 26 Aug 2019 16:26:01 +0300 Subject: x86/boot/compressed/64: Fix missing initialization in find_trampoline_placement() Gustavo noticed that 'new' can be left uninitialized if 'bios_start' happens to be less or equal to 'entry->addr + entry->size'. Initialize the variable at the begin of the iteration to the current value of 'bios_start'. Fixes: 0a46fff2f910 ("x86/boot/compressed/64: Fix boot on machines with broken E820 table") Reported-by: "Gustavo A. R. Silva" Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190826133326.7cxb4vbmiawffv2r@box --- arch/x86/boot/compressed/pgtable_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 2faddeb0398a..c8862696a47b 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -72,7 +72,7 @@ static unsigned long find_trampoline_placement(void) /* Find the first usable memory region under bios_start. */ for (i = boot_params->e820_entries - 1; i >= 0; i--) { - unsigned long new; + unsigned long new = bios_start; entry = &boot_params->e820_table[i]; -- cgit v1.2.1 From c3c9e3df49f8d83db09d1f61c8bed54e7fed8662 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 Aug 2019 09:25:37 +0100 Subject: rxrpc: Improve jumbo packet counting Improve the information stored about jumbo packets so that we don't need to reparse them so much later. Signed-off-by: David Howells Reviewed-by: Jeffrey Altman --- net/rxrpc/ar-internal.h | 10 +++++++--- net/rxrpc/input.c | 23 ++++++++++++++--------- net/rxrpc/protocol.h | 9 +++++++++ 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 145335611af6..87cff6c218b6 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -185,11 +185,15 @@ struct rxrpc_host_header { * - max 48 bytes (struct sk_buff::cb) */ struct rxrpc_skb_priv { - union { - u8 nr_jumbo; /* Number of jumbo subpackets */ - }; + u8 nr_subpackets; /* Number of subpackets */ + u8 rx_flags; /* Received packet flags */ +#define RXRPC_SKB_INCL_LAST 0x01 /* - Includes last packet */ union { int remain; /* amount of space remaining for next write */ + + /* List of requested ACKs on subpackets */ + unsigned long rx_req_ack[(RXRPC_MAX_NR_JUMBO + BITS_PER_LONG - 1) / + BITS_PER_LONG]; }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index dd47d465d1d3..ffcec5117954 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -347,7 +347,7 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) } /* - * Scan a jumbo packet to validate its structure and to work out how many + * Scan a data packet to validate its structure and to work out how many * subpackets it contains. * * A jumbo packet is a collection of consecutive packets glued together with @@ -358,16 +358,21 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) * the last are RXRPC_JUMBO_DATALEN in size. The last subpacket may be of any * size. */ -static bool rxrpc_validate_jumbo(struct sk_buff *skb) +static bool rxrpc_validate_data(struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int len = skb->len; - int nr_jumbo = 1; u8 flags = sp->hdr.flags; - do { - nr_jumbo++; + for (;;) { + if (flags & RXRPC_REQUEST_ACK) + __set_bit(sp->nr_subpackets, sp->rx_req_ack); + sp->nr_subpackets++; + + if (!(flags & RXRPC_JUMBO_PACKET)) + break; + if (len - offset < RXRPC_JUMBO_SUBPKTLEN) goto protocol_error; if (flags & RXRPC_LAST_PACKET) @@ -376,9 +381,10 @@ static bool rxrpc_validate_jumbo(struct sk_buff *skb) if (skb_copy_bits(skb, offset, &flags, 1) < 0) goto protocol_error; offset += sizeof(struct rxrpc_jumbo_header); - } while (flags & RXRPC_JUMBO_PACKET); + } - sp->nr_jumbo = nr_jumbo; + if (flags & RXRPC_LAST_PACKET) + sp->rx_flags |= RXRPC_SKB_INCL_LAST; return true; protocol_error: @@ -1237,8 +1243,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) if (sp->hdr.callNumber == 0 || sp->hdr.seq == 0) goto bad_message; - if (sp->hdr.flags & RXRPC_JUMBO_PACKET && - !rxrpc_validate_jumbo(skb)) + if (!rxrpc_validate_data(skb)) goto bad_message; break; diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h index 99ce322d7caa..49bb972539aa 100644 --- a/net/rxrpc/protocol.h +++ b/net/rxrpc/protocol.h @@ -89,6 +89,15 @@ struct rxrpc_jumbo_header { #define RXRPC_JUMBO_DATALEN 1412 /* non-terminal jumbo packet data length */ #define RXRPC_JUMBO_SUBPKTLEN (RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header)) +/* + * The maximum number of subpackets that can possibly fit in a UDP packet is: + * + * ((max_IP - IP_hdr - UDP_hdr) / RXRPC_JUMBO_SUBPKTLEN) + 1 + * = ((65535 - 28 - 28) / 1416) + 1 + * = 46 non-terminal packets and 1 terminal packet. + */ +#define RXRPC_MAX_NR_JUMBO 47 + /*****************************************************************************/ /* * on-the-wire Rx ACK packet data payload -- cgit v1.2.1 From e2de6c4048989007b353164b19d6b7d5be4fa9e3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 27 Aug 2019 09:51:30 +0100 Subject: rxrpc: Use info in skbuff instead of reparsing a jumbo packet Use the information now cached in the skbuff private data to avoid the need to reparse a jumbo packet. We can find all the subpackets by dead reckoning, so it's only necessary to note how many there are, whether the last one is flagged as LAST_PACKET and whether any have the REQUEST_ACK flag set. This is necessary as once recvmsg() can see the packet, it can start modifying it, such as doing in-place decryption. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 3 +- net/rxrpc/input.c | 231 ++++++++++++++++++++++++------------------------ net/rxrpc/recvmsg.c | 41 +++++---- 3 files changed, 139 insertions(+), 136 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 87cff6c218b6..20d7907a5bc6 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -617,8 +617,7 @@ struct rxrpc_call { #define RXRPC_TX_ANNO_LAST 0x04 #define RXRPC_TX_ANNO_RESENT 0x08 -#define RXRPC_RX_ANNO_JUMBO 0x3f /* Jumbo subpacket number + 1 if not zero */ -#define RXRPC_RX_ANNO_JLAST 0x40 /* Set if last element of a jumbo packet */ +#define RXRPC_RX_ANNO_SUBPACKET 0x3f /* Subpacket number in jumbogram */ #define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ rxrpc_seq_t tx_hard_ack; /* Dead slot in buffer; the first transmitted but * not hard-ACK'd packet follows this. diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index ffcec5117954..35b1a9368d80 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -405,10 +405,10 @@ protocol_error: * (that information is encoded in the ACK packet). */ static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, - u8 annotation, bool *_jumbo_bad) + bool is_jumbo, bool *_jumbo_bad) { /* Discard normal packets that are duplicates. */ - if (annotation == 0) + if (is_jumbo) return; /* Skip jumbo subpackets that are duplicates. When we've had three or @@ -428,19 +428,17 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); enum rxrpc_call_state state; - unsigned int offset = sizeof(struct rxrpc_wire_header); - unsigned int ix; + unsigned int j; rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; - rxrpc_seq_t seq = sp->hdr.seq, hard_ack; - bool immediate_ack = false, jumbo_bad = false, queued; - u16 len; - u8 ack = 0, flags, annotation = 0; + rxrpc_seq_t seq0 = sp->hdr.seq, hard_ack; + bool immediate_ack = false, jumbo_bad = false; + u8 ack = 0; _enter("{%u,%u},{%u,%u}", - call->rx_hard_ack, call->rx_top, skb->len, seq); + call->rx_hard_ack, call->rx_top, skb->len, seq0); - _proto("Rx DATA %%%u { #%u f=%02x }", - sp->hdr.serial, seq, sp->hdr.flags); + _proto("Rx DATA %%%u { #%u f=%02x n=%u }", + sp->hdr.serial, seq0, sp->hdr.flags, sp->nr_subpackets); state = READ_ONCE(call->state); if (state >= RXRPC_CALL_COMPLETE) @@ -469,137 +467,136 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) !rxrpc_receiving_reply(call)) goto unlock; - call->ackr_prev_seq = seq; - + call->ackr_prev_seq = seq0; hard_ack = READ_ONCE(call->rx_hard_ack); - if (after(seq, hard_ack + call->rx_winsize)) { - ack = RXRPC_ACK_EXCEEDS_WINDOW; - ack_serial = serial; - goto ack; - } - flags = sp->hdr.flags; - if (flags & RXRPC_JUMBO_PACKET) { + if (sp->nr_subpackets > 1) { if (call->nr_jumbo_bad > 3) { ack = RXRPC_ACK_NOSPACE; ack_serial = serial; goto ack; } - annotation = 1; } -next_subpacket: - queued = false; - ix = seq & RXRPC_RXTX_BUFF_MASK; - len = skb->len; - if (flags & RXRPC_JUMBO_PACKET) - len = RXRPC_JUMBO_DATALEN; - - if (flags & RXRPC_LAST_PACKET) { - if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && - seq != call->rx_top) { - rxrpc_proto_abort("LSN", call, seq); - goto unlock; - } - } else { - if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && - after_eq(seq, call->rx_top)) { - rxrpc_proto_abort("LSA", call, seq); - goto unlock; + for (j = 0; j < sp->nr_subpackets; j++) { + rxrpc_serial_t serial = sp->hdr.serial + j; + rxrpc_seq_t seq = seq0 + j; + unsigned int ix = seq & RXRPC_RXTX_BUFF_MASK; + bool terminal = (j == sp->nr_subpackets - 1); + bool last = terminal && (sp->rx_flags & RXRPC_SKB_INCL_LAST); + u8 flags, annotation = j; + + _proto("Rx DATA+%u %%%u { #%x t=%u l=%u }", + j, serial, seq, terminal, last); + + if (last) { + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && + seq != call->rx_top) { + rxrpc_proto_abort("LSN", call, seq); + goto unlock; + } + } else { + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && + after_eq(seq, call->rx_top)) { + rxrpc_proto_abort("LSA", call, seq); + goto unlock; + } } - } - - trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation); - if (before_eq(seq, hard_ack)) { - ack = RXRPC_ACK_DUPLICATE; - ack_serial = serial; - goto skip; - } - if (flags & RXRPC_REQUEST_ACK && !ack) { - ack = RXRPC_ACK_REQUESTED; - ack_serial = serial; - } + flags = 0; + if (last) + flags |= RXRPC_LAST_PACKET; + if (!terminal) + flags |= RXRPC_JUMBO_PACKET; + if (test_bit(j, sp->rx_req_ack)) + flags |= RXRPC_REQUEST_ACK; + trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation); - if (call->rxtx_buffer[ix]) { - rxrpc_input_dup_data(call, seq, annotation, &jumbo_bad); - if (ack != RXRPC_ACK_DUPLICATE) { + if (before_eq(seq, hard_ack)) { ack = RXRPC_ACK_DUPLICATE; ack_serial = serial; + continue; } - immediate_ack = true; - goto skip; - } - - /* Queue the packet. We use a couple of memory barriers here as need - * to make sure that rx_top is perceived to be set after the buffer - * pointer and that the buffer pointer is set after the annotation and - * the skb data. - * - * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window() - * and also rxrpc_fill_out_ack(). - */ - rxrpc_get_skb(skb, rxrpc_skb_rx_got); - call->rxtx_annotations[ix] = annotation; - smp_wmb(); - call->rxtx_buffer[ix] = skb; - if (after(seq, call->rx_top)) { - smp_store_release(&call->rx_top, seq); - } else if (before(seq, call->rx_top)) { - /* Send an immediate ACK if we fill in a hole */ - if (!ack) { - ack = RXRPC_ACK_DELAY; - ack_serial = serial; - } - immediate_ack = true; - } - if (flags & RXRPC_LAST_PACKET) { - set_bit(RXRPC_CALL_RX_LAST, &call->flags); - trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq); - } else { - trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq); - } - queued = true; - if (after_eq(seq, call->rx_expect_next)) { - if (after(seq, call->rx_expect_next)) { - _net("OOS %u > %u", seq, call->rx_expect_next); - ack = RXRPC_ACK_OUT_OF_SEQUENCE; - ack_serial = serial; + if (call->rxtx_buffer[ix]) { + rxrpc_input_dup_data(call, seq, sp->nr_subpackets > 1, + &jumbo_bad); + if (ack != RXRPC_ACK_DUPLICATE) { + ack = RXRPC_ACK_DUPLICATE; + ack_serial = serial; + } + immediate_ack = true; + continue; } - call->rx_expect_next = seq + 1; - } -skip: - offset += len; - if (flags & RXRPC_JUMBO_PACKET) { - if (skb_copy_bits(skb, offset, &flags, 1) < 0) { - rxrpc_proto_abort("XJF", call, seq); - goto unlock; - } - offset += sizeof(struct rxrpc_jumbo_header); - seq++; - serial++; - annotation++; - if (flags & RXRPC_JUMBO_PACKET) - annotation |= RXRPC_RX_ANNO_JLAST; if (after(seq, hard_ack + call->rx_winsize)) { ack = RXRPC_ACK_EXCEEDS_WINDOW; ack_serial = serial; - if (!jumbo_bad) { - call->nr_jumbo_bad++; - jumbo_bad = true; + if (flags & RXRPC_JUMBO_PACKET) { + if (!jumbo_bad) { + call->nr_jumbo_bad++; + jumbo_bad = true; + } } + goto ack; } - _proto("Rx DATA Jumbo %%%u", serial); - goto next_subpacket; - } + if (flags & RXRPC_REQUEST_ACK && !ack) { + ack = RXRPC_ACK_REQUESTED; + ack_serial = serial; + } + + /* Queue the packet. We use a couple of memory barriers here as need + * to make sure that rx_top is perceived to be set after the buffer + * pointer and that the buffer pointer is set after the annotation and + * the skb data. + * + * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window() + * and also rxrpc_fill_out_ack(). + */ + rxrpc_get_skb(skb, rxrpc_skb_rx_got); + call->rxtx_annotations[ix] = annotation; + smp_wmb(); + call->rxtx_buffer[ix] = skb; + if (after(seq, call->rx_top)) { + smp_store_release(&call->rx_top, seq); + } else if (before(seq, call->rx_top)) { + /* Send an immediate ACK if we fill in a hole */ + if (!ack) { + ack = RXRPC_ACK_DELAY; + ack_serial = serial; + } + immediate_ack = true; + } + + if (terminal) { + /* From this point on, we're not allowed to touch the + * packet any longer as its ref now belongs to the Rx + * ring. + */ + skb = NULL; + } - if (queued && flags & RXRPC_LAST_PACKET && !ack) { - ack = RXRPC_ACK_DELAY; - ack_serial = serial; + if (last) { + set_bit(RXRPC_CALL_RX_LAST, &call->flags); + if (!ack) { + ack = RXRPC_ACK_DELAY; + ack_serial = serial; + } + trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq); + } else { + trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq); + } + + if (after_eq(seq, call->rx_expect_next)) { + if (after(seq, call->rx_expect_next)) { + _net("OOS %u > %u", seq, call->rx_expect_next); + ack = RXRPC_ACK_OUT_OF_SEQUENCE; + ack_serial = serial; + } + call->rx_expect_next = seq + 1; + } } ack: @@ -612,7 +609,7 @@ ack: false, true, rxrpc_propose_ack_input_data); - if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1) { + if (seq0 == READ_ONCE(call->rx_hard_ack) + 1) { trace_rxrpc_notify_socket(call->debug_id, serial); rxrpc_notify_socket(call); } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 9a7e1bc9791d..e49eacfaf4d6 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -177,7 +177,8 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) struct sk_buff *skb; rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top; - u8 flags; + bool last = false; + u8 subpacket; int ix; _enter("%d", call->debug_id); @@ -191,10 +192,13 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) skb = call->rxtx_buffer[ix]; rxrpc_see_skb(skb, rxrpc_skb_rx_rotated); sp = rxrpc_skb(skb); - flags = sp->hdr.flags; - serial = sp->hdr.serial; - if (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) - serial += (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) - 1; + + subpacket = call->rxtx_annotations[ix] & RXRPC_RX_ANNO_SUBPACKET; + serial = sp->hdr.serial + subpacket; + + if (subpacket == sp->nr_subpackets - 1 && + sp->rx_flags & RXRPC_SKB_INCL_LAST) + last = true; call->rxtx_buffer[ix] = NULL; call->rxtx_annotations[ix] = 0; @@ -203,9 +207,8 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) rxrpc_free_skb(skb, rxrpc_skb_rx_freed); - _debug("%u,%u,%02x", hard_ack, top, flags); trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); - if (flags & RXRPC_LAST_PACKET) { + if (last) { rxrpc_end_rx_phase(call, serial); } else { /* Check to see if there's an ACK that needs sending. */ @@ -233,18 +236,19 @@ static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); rxrpc_seq_t seq = sp->hdr.seq; u16 cksum = sp->hdr.cksum; + u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET; _enter(""); /* For all but the head jumbo subpacket, the security checksum is in a * jumbo header immediately prior to the data. */ - if ((annotation & RXRPC_RX_ANNO_JUMBO) > 1) { + if (subpacket > 0) { __be16 tmp; if (skb_copy_bits(skb, offset - 2, &tmp, 2) < 0) BUG(); cksum = ntohs(tmp); - seq += (annotation & RXRPC_RX_ANNO_JUMBO) - 1; + seq += subpacket; } return call->conn->security->verify_packet(call, skb, offset, len, @@ -265,19 +269,18 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, u8 *_annotation, unsigned int *_offset, unsigned int *_len) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int len; int ret; u8 annotation = *_annotation; + u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET; /* Locate the subpacket */ + offset += subpacket * RXRPC_JUMBO_SUBPKTLEN; len = skb->len - offset; - if ((annotation & RXRPC_RX_ANNO_JUMBO) > 0) { - offset += (((annotation & RXRPC_RX_ANNO_JUMBO) - 1) * - RXRPC_JUMBO_SUBPKTLEN); - len = (annotation & RXRPC_RX_ANNO_JLAST) ? - skb->len - offset : RXRPC_JUMBO_SUBPKTLEN; - } + if (subpacket < sp->nr_subpackets - 1) + len = RXRPC_JUMBO_DATALEN; if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) { ret = rxrpc_verify_packet(call, skb, annotation, offset, len); @@ -303,6 +306,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, { struct rxrpc_skb_priv *sp; struct sk_buff *skb; + rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top, seq; size_t remain; bool last; @@ -339,9 +343,12 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, rxrpc_see_skb(skb, rxrpc_skb_rx_seen); sp = rxrpc_skb(skb); - if (!(flags & MSG_PEEK)) + if (!(flags & MSG_PEEK)) { + serial = sp->hdr.serial; + serial += call->rxtx_annotations[ix] & RXRPC_RX_ANNO_SUBPACKET; trace_rxrpc_receive(call, rxrpc_receive_front, - sp->hdr.serial, seq); + serial, seq); + } if (msg) sock_recv_timestamp(msg, sock->sk, skb); -- cgit v1.2.1 From 4858e40303fba12e7506bf4354f20fdf550b3cd5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 Aug 2019 09:25:36 +0100 Subject: rxrpc: Pass the input handler's data skb reference to the Rx ring Pass the reference held on a DATA skb in the rxrpc input handler into the Rx ring rather than getting an additional ref for this and then dropping the original ref at the end. Signed-off-by: David Howells --- net/rxrpc/input.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 35b1a9368d80..140cede77655 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -422,7 +422,8 @@ static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, } /* - * Process a DATA packet, adding the packet to the Rx ring. + * Process a DATA packet, adding the packet to the Rx ring. The caller's + * packet ref must be passed on or discarded. */ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) { @@ -441,8 +442,10 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) sp->hdr.serial, seq0, sp->hdr.flags, sp->nr_subpackets); state = READ_ONCE(call->state); - if (state >= RXRPC_CALL_COMPLETE) + if (state >= RXRPC_CALL_COMPLETE) { + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); return; + } if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) { unsigned long timo = READ_ONCE(call->next_req_timo); @@ -555,7 +558,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window() * and also rxrpc_fill_out_ack(). */ - rxrpc_get_skb(skb, rxrpc_skb_rx_got); + if (!terminal) + rxrpc_get_skb(skb, rxrpc_skb_rx_got); call->rxtx_annotations[ix] = annotation; smp_wmb(); call->rxtx_buffer[ix] = skb; @@ -616,6 +620,7 @@ ack: unlock: spin_unlock(&call->input_lock); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); _leave(" [queued]"); } @@ -1024,7 +1029,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: rxrpc_input_data(call, skb); - break; + goto no_free; case RXRPC_PACKET_TYPE_ACK: rxrpc_input_ack(call, skb); @@ -1051,6 +1056,8 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, break; } + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); +no_free: _leave(""); } @@ -1375,8 +1382,11 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) mutex_unlock(&call->user_mutex); } + /* Process a call packet; this either discards or passes on the ref + * elsewhere. + */ rxrpc_input_call_packet(call, skb); - goto discard; + goto out; discard: rxrpc_free_skb(skb, rxrpc_skb_rx_freed); -- cgit v1.2.1 From a641fd00d05a5ae38c5a3d50d4da10283b15287b Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 Aug 2019 09:25:37 +0100 Subject: rxrpc: Abstract out rxtx ring cleanup Abstract out rxtx ring cleanup into its own function from its two callers. This makes it easier to apply the same changes to both. Signed-off-by: David Howells --- net/rxrpc/call_object.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 217b12be9e08..c9ab2da957fe 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -421,6 +421,21 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) trace_rxrpc_call(call, op, n, here, NULL); } +/* + * Clean up the RxTx skb ring. + */ +static void rxrpc_cleanup_ring(struct rxrpc_call *call) +{ + int i; + + for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { + rxrpc_free_skb(call->rxtx_buffer[i], + (call->tx_phase ? rxrpc_skb_tx_cleaned : + rxrpc_skb_rx_cleaned)); + call->rxtx_buffer[i] = NULL; + } +} + /* * Detach a call from its owning socket. */ @@ -429,7 +444,6 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) const void *here = __builtin_return_address(0); struct rxrpc_connection *conn = call->conn; bool put = false; - int i; _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); @@ -479,13 +493,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) if (conn) rxrpc_disconnect_call(call); - for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { - rxrpc_free_skb(call->rxtx_buffer[i], - (call->tx_phase ? rxrpc_skb_tx_cleaned : - rxrpc_skb_rx_cleaned)); - call->rxtx_buffer[i] = NULL; - } - + rxrpc_cleanup_ring(call); _leave(""); } @@ -568,8 +576,6 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) */ void rxrpc_cleanup_call(struct rxrpc_call *call) { - int i; - _net("DESTROY CALL %d", call->debug_id); memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); @@ -580,12 +586,7 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); ASSERTCMP(call->conn, ==, NULL); - /* Clean up the Rx/Tx buffer */ - for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) - rxrpc_free_skb(call->rxtx_buffer[i], - (call->tx_phase ? rxrpc_skb_tx_cleaned : - rxrpc_skb_rx_cleaned)); - + rxrpc_cleanup_ring(call); rxrpc_free_skb(call->tx_pending, rxrpc_skb_tx_cleaned); call_rcu(&call->rcu, rxrpc_rcu_destroy_call); -- cgit v1.2.1 From b311e68420aa52098591988d0d6868b0b7463c0f Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 Aug 2019 09:25:37 +0100 Subject: rxrpc: Add a private skb flag to indicate transmission-phase skbs Add a flag in the private data on an skbuff to indicate that this is a transmission-phase buffer rather than a receive-phase buffer. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/sendmsg.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 20d7907a5bc6..63d3a91ce5e9 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -188,6 +188,7 @@ struct rxrpc_skb_priv { u8 nr_subpackets; /* Number of subpackets */ u8 rx_flags; /* Received packet flags */ #define RXRPC_SKB_INCL_LAST 0x01 /* - Includes last packet */ +#define RXRPC_SKB_TX_BUFFER 0x02 /* - Is transmit buffer */ union { int remain; /* amount of space remaining for next write */ diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index bae14438f869..472dc3b7d91f 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -336,6 +336,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, if (!skb) goto maybe_error; + sp = rxrpc_skb(skb); + sp->rx_flags |= RXRPC_SKB_TX_BUFFER; rxrpc_new_skb(skb, rxrpc_skb_tx_new); _debug("ALLOC SEND %p", skb); @@ -346,7 +348,6 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, skb_reserve(skb, call->conn->security_size); skb->len += call->conn->security_size; - sp = rxrpc_skb(skb); sp->remain = chunk; if (sp->remain > skb_tailroom(skb)) sp->remain = skb_tailroom(skb); -- cgit v1.2.1 From 987db9f7cd1e77e611b770a569068c43949aa6fd Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 Aug 2019 09:25:38 +0100 Subject: rxrpc: Use the tx-phase skb flag to simplify tracing Use the previously-added transmit-phase skbuff private flag to simplify the socket buffer tracing a bit. Which phase the skbuff comes from can now be divined from the skb rather than having to be guessed from the call state. We can also reduce the number of rxrpc_skb_trace values by eliminating the difference between Tx and Rx in the symbols. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 51 +++++++++++++++++++------------------------- net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_event.c | 8 +++---- net/rxrpc/call_object.c | 6 ++---- net/rxrpc/conn_event.c | 6 +++--- net/rxrpc/input.c | 22 +++++++++---------- net/rxrpc/local_event.c | 4 ++-- net/rxrpc/output.c | 6 +++--- net/rxrpc/peer_event.c | 10 ++++----- net/rxrpc/recvmsg.c | 6 +++--- net/rxrpc/sendmsg.c | 10 ++++----- net/rxrpc/skbuff.c | 15 +++++++------ 12 files changed, 69 insertions(+), 76 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index fa06b528c73c..e2356c51883b 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -23,20 +23,15 @@ #define __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY enum rxrpc_skb_trace { - rxrpc_skb_rx_cleaned, - rxrpc_skb_rx_freed, - rxrpc_skb_rx_got, - rxrpc_skb_rx_lost, - rxrpc_skb_rx_purged, - rxrpc_skb_rx_received, - rxrpc_skb_rx_rotated, - rxrpc_skb_rx_seen, - rxrpc_skb_tx_cleaned, - rxrpc_skb_tx_freed, - rxrpc_skb_tx_got, - rxrpc_skb_tx_new, - rxrpc_skb_tx_rotated, - rxrpc_skb_tx_seen, + rxrpc_skb_cleaned, + rxrpc_skb_freed, + rxrpc_skb_got, + rxrpc_skb_lost, + rxrpc_skb_new, + rxrpc_skb_purged, + rxrpc_skb_received, + rxrpc_skb_rotated, + rxrpc_skb_seen, }; enum rxrpc_local_trace { @@ -228,20 +223,15 @@ enum rxrpc_tx_point { * Declare tracing information enums and their string mappings for display. */ #define rxrpc_skb_traces \ - EM(rxrpc_skb_rx_cleaned, "Rx CLN") \ - EM(rxrpc_skb_rx_freed, "Rx FRE") \ - EM(rxrpc_skb_rx_got, "Rx GOT") \ - EM(rxrpc_skb_rx_lost, "Rx *L*") \ - EM(rxrpc_skb_rx_purged, "Rx PUR") \ - EM(rxrpc_skb_rx_received, "Rx RCV") \ - EM(rxrpc_skb_rx_rotated, "Rx ROT") \ - EM(rxrpc_skb_rx_seen, "Rx SEE") \ - EM(rxrpc_skb_tx_cleaned, "Tx CLN") \ - EM(rxrpc_skb_tx_freed, "Tx FRE") \ - EM(rxrpc_skb_tx_got, "Tx GOT") \ - EM(rxrpc_skb_tx_new, "Tx NEW") \ - EM(rxrpc_skb_tx_rotated, "Tx ROT") \ - E_(rxrpc_skb_tx_seen, "Tx SEE") + EM(rxrpc_skb_cleaned, "CLN") \ + EM(rxrpc_skb_freed, "FRE") \ + EM(rxrpc_skb_got, "GOT") \ + EM(rxrpc_skb_lost, "*L*") \ + EM(rxrpc_skb_new, "NEW") \ + EM(rxrpc_skb_purged, "PUR") \ + EM(rxrpc_skb_received, "RCV") \ + EM(rxrpc_skb_rotated, "ROT") \ + E_(rxrpc_skb_seen, "SEE") #define rxrpc_local_traces \ EM(rxrpc_local_got, "GOT") \ @@ -650,6 +640,7 @@ TRACE_EVENT(rxrpc_skb, TP_STRUCT__entry( __field(struct sk_buff *, skb ) __field(enum rxrpc_skb_trace, op ) + __field(u8, flags ) __field(int, usage ) __field(int, mod_count ) __field(const void *, where ) @@ -657,14 +648,16 @@ TRACE_EVENT(rxrpc_skb, TP_fast_assign( __entry->skb = skb; + __entry->flags = rxrpc_skb(skb)->rx_flags; __entry->op = op; __entry->usage = usage; __entry->mod_count = mod_count; __entry->where = where; ), - TP_printk("s=%p %s u=%d m=%d p=%pSR", + TP_printk("s=%p %cx %s u=%d m=%d p=%pSR", __entry->skb, + __entry->flags & RXRPC_SKB_TX_BUFFER ? 'T' : 'R', __print_symbolic(__entry->op, rxrpc_skb_traces), __entry->usage, __entry->mod_count, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 63d3a91ce5e9..2d5294f3e62f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -185,6 +185,7 @@ struct rxrpc_host_header { * - max 48 bytes (struct sk_buff::cb) */ struct rxrpc_skb_priv { + atomic_t nr_ring_pins; /* Number of rxtx ring pins */ u8 nr_subpackets; /* Number of subpackets */ u8 rx_flags; /* Received packet flags */ #define RXRPC_SKB_INCL_LAST 0x01 /* - Includes last packet */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index c767679bfa5d..cedbbb3a7c2e 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -199,7 +199,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) continue; skb = call->rxtx_buffer[ix]; - rxrpc_see_skb(skb, rxrpc_skb_tx_seen); + rxrpc_see_skb(skb, rxrpc_skb_seen); if (anno_type == RXRPC_TX_ANNO_UNACK) { if (ktime_after(skb->tstamp, max_age)) { @@ -255,18 +255,18 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) continue; skb = call->rxtx_buffer[ix]; - rxrpc_get_skb(skb, rxrpc_skb_tx_got); + rxrpc_get_skb(skb, rxrpc_skb_got); spin_unlock_bh(&call->lock); if (rxrpc_send_data_packet(call, skb, true) < 0) { - rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); return; } if (rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); - rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); spin_lock_bh(&call->lock); /* We need to clear the retransmit state, but there are two diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index c9ab2da957fe..014548c259ce 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -429,9 +429,7 @@ static void rxrpc_cleanup_ring(struct rxrpc_call *call) int i; for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { - rxrpc_free_skb(call->rxtx_buffer[i], - (call->tx_phase ? rxrpc_skb_tx_cleaned : - rxrpc_skb_rx_cleaned)); + rxrpc_free_skb(call->rxtx_buffer[i], rxrpc_skb_cleaned); call->rxtx_buffer[i] = NULL; } } @@ -587,7 +585,7 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) ASSERTCMP(call->conn, ==, NULL); rxrpc_cleanup_ring(call); - rxrpc_free_skb(call->tx_pending, rxrpc_skb_tx_cleaned); + rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned); call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index df6624c140be..a1ceef4f5cd0 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -472,7 +472,7 @@ void rxrpc_process_connection(struct work_struct *work) /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { - rxrpc_see_skb(skb, rxrpc_skb_rx_seen); + rxrpc_see_skb(skb, rxrpc_skb_seen); ret = rxrpc_process_event(conn, skb, &abort_code); switch (ret) { case -EPROTO: @@ -484,7 +484,7 @@ void rxrpc_process_connection(struct work_struct *work) goto requeue_and_leave; case -ECONNABORTED: default: - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); break; } } @@ -501,6 +501,6 @@ requeue_and_leave: protocol_error: if (rxrpc_abort_connection(conn, ret, abort_code) < 0) goto requeue_and_leave; - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); goto out; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 140cede77655..31090bdf1fae 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -233,7 +233,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; annotation = call->rxtx_annotations[ix]; - rxrpc_see_skb(skb, rxrpc_skb_tx_rotated); + rxrpc_see_skb(skb, rxrpc_skb_rotated); call->rxtx_buffer[ix] = NULL; call->rxtx_annotations[ix] = 0; skb->next = list; @@ -258,7 +258,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, skb = list; list = skb->next; skb_mark_not_on_list(skb); - rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); } return rot_last; @@ -443,7 +443,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) state = READ_ONCE(call->state); if (state >= RXRPC_CALL_COMPLETE) { - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); return; } @@ -559,7 +559,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) * and also rxrpc_fill_out_ack(). */ if (!terminal) - rxrpc_get_skb(skb, rxrpc_skb_rx_got); + rxrpc_get_skb(skb, rxrpc_skb_got); call->rxtx_annotations[ix] = annotation; smp_wmb(); call->rxtx_buffer[ix] = skb; @@ -620,7 +620,7 @@ ack: unlock: spin_unlock(&call->input_lock); - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); _leave(" [queued]"); } @@ -1056,7 +1056,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, break; } - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); no_free: _leave(""); } @@ -1119,7 +1119,7 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local, skb_queue_tail(&local->event_queue, skb); rxrpc_queue_local(local); } else { - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); } } @@ -1134,7 +1134,7 @@ static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) skb_queue_tail(&local->reject_queue, skb); rxrpc_queue_local(local); } else { - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); } } @@ -1198,7 +1198,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) if (skb->tstamp == 0) skb->tstamp = ktime_get_real(); - rxrpc_new_skb(skb, rxrpc_skb_rx_received); + rxrpc_new_skb(skb, rxrpc_skb_received); skb_pull(skb, sizeof(struct udphdr)); @@ -1215,7 +1215,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) static int lose; if ((lose++ & 7) == 7) { trace_rxrpc_rx_lose(sp); - rxrpc_free_skb(skb, rxrpc_skb_rx_lost); + rxrpc_free_skb(skb, rxrpc_skb_lost); return 0; } } @@ -1389,7 +1389,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) goto out; discard: - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); out: trace_rxrpc_rx_done(0, 0); return 0; diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index e93a78f7c05e..3ce6d628cd75 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -90,7 +90,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local) if (skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - rxrpc_see_skb(skb, rxrpc_skb_rx_seen); + rxrpc_see_skb(skb, rxrpc_skb_seen); _debug("{%d},{%u}", local->debug_id, sp->hdr.type); switch (sp->hdr.type) { @@ -108,7 +108,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local) break; } - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); } _leave(""); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 369e516c4bdf..935bb60fff56 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -565,7 +565,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) memset(&whdr, 0, sizeof(whdr)); while ((skb = skb_dequeue(&local->reject_queue))) { - rxrpc_see_skb(skb, rxrpc_skb_rx_seen); + rxrpc_see_skb(skb, rxrpc_skb_seen); sp = rxrpc_skb(skb); switch (skb->mark) { @@ -581,7 +581,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) ioc = 2; break; default: - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); continue; } @@ -606,7 +606,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) rxrpc_tx_point_reject); } - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); } _leave(""); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 7666ec72d37e..c97ebdc043e4 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -163,11 +163,11 @@ void rxrpc_error_report(struct sock *sk) _leave("UDP socket errqueue empty"); return; } - rxrpc_new_skb(skb, rxrpc_skb_rx_received); + rxrpc_new_skb(skb, rxrpc_skb_received); serr = SKB_EXT_ERR(skb); if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { _leave("UDP empty message"); - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); return; } @@ -177,7 +177,7 @@ void rxrpc_error_report(struct sock *sk) peer = NULL; if (!peer) { rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); _leave(" [no peer]"); return; } @@ -189,7 +189,7 @@ void rxrpc_error_report(struct sock *sk) serr->ee.ee_code == ICMP_FRAG_NEEDED)) { rxrpc_adjust_mtu(peer, serr); rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); rxrpc_put_peer(peer); _leave(" [MTU update]"); return; @@ -197,7 +197,7 @@ void rxrpc_error_report(struct sock *sk) rxrpc_store_error(peer, serr); rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); rxrpc_put_peer(peer); _leave(""); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index e49eacfaf4d6..3b0becb12041 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -190,7 +190,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) hard_ack++; ix = hard_ack & RXRPC_RXTX_BUFF_MASK; skb = call->rxtx_buffer[ix]; - rxrpc_see_skb(skb, rxrpc_skb_rx_rotated); + rxrpc_see_skb(skb, rxrpc_skb_rotated); sp = rxrpc_skb(skb); subpacket = call->rxtx_annotations[ix] & RXRPC_RX_ANNO_SUBPACKET; @@ -205,7 +205,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) /* Barrier against rxrpc_input_data(). */ smp_store_release(&call->rx_hard_ack, hard_ack); - rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); if (last) { @@ -340,7 +340,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, break; } smp_rmb(); - rxrpc_see_skb(skb, rxrpc_skb_rx_seen); + rxrpc_see_skb(skb, rxrpc_skb_seen); sp = rxrpc_skb(skb); if (!(flags & MSG_PEEK)) { diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 472dc3b7d91f..6a1547b270fe 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -176,7 +176,7 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, skb->tstamp = ktime_get_real(); ix = seq & RXRPC_RXTX_BUFF_MASK; - rxrpc_get_skb(skb, rxrpc_skb_tx_got); + rxrpc_get_skb(skb, rxrpc_skb_got); call->rxtx_annotations[ix] = annotation; smp_wmb(); call->rxtx_buffer[ix] = skb; @@ -248,7 +248,7 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, } out: - rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); _leave(" = %d", ret); return ret; } @@ -289,7 +289,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, skb = call->tx_pending; call->tx_pending = NULL; - rxrpc_see_skb(skb, rxrpc_skb_tx_seen); + rxrpc_see_skb(skb, rxrpc_skb_seen); copied = 0; do { @@ -338,7 +338,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, sp = rxrpc_skb(skb); sp->rx_flags |= RXRPC_SKB_TX_BUFFER; - rxrpc_new_skb(skb, rxrpc_skb_tx_new); + rxrpc_new_skb(skb, rxrpc_skb_new); _debug("ALLOC SEND %p", skb); @@ -440,7 +440,7 @@ out: return ret; call_terminated: - rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + rxrpc_free_skb(skb, rxrpc_skb_freed); _leave(" = %d", call->error); return call->error; diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 9ad5045b7c2f..8e6f45f84b9b 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -14,7 +14,8 @@ #include #include "ar-internal.h" -#define select_skb_count(op) (op >= rxrpc_skb_tx_cleaned ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs) +#define is_tx_skb(skb) (rxrpc_skb(skb)->rx_flags & RXRPC_SKB_TX_BUFFER) +#define select_skb_count(skb) (is_tx_skb(skb) ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs) /* * Note the allocation or reception of a socket buffer. @@ -22,7 +23,7 @@ void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); - int n = atomic_inc_return(select_skb_count(op)); + int n = atomic_inc_return(select_skb_count(skb)); trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); } @@ -33,7 +34,7 @@ void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); if (skb) { - int n = atomic_read(select_skb_count(op)); + int n = atomic_read(select_skb_count(skb)); trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); } } @@ -44,7 +45,7 @@ void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); - int n = atomic_inc_return(select_skb_count(op)); + int n = atomic_inc_return(select_skb_count(skb)); trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); skb_get(skb); } @@ -58,7 +59,7 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) if (skb) { int n; CHECK_SLAB_OKAY(&skb->users); - n = atomic_dec_return(select_skb_count(op)); + n = atomic_dec_return(select_skb_count(skb)); trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); kfree_skb(skb); } @@ -72,8 +73,8 @@ void rxrpc_purge_queue(struct sk_buff_head *list) const void *here = __builtin_return_address(0); struct sk_buff *skb; while ((skb = skb_dequeue((list))) != NULL) { - int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged)); - trace_rxrpc_skb(skb, rxrpc_skb_rx_purged, + int n = atomic_dec_return(select_skb_count(skb)); + trace_rxrpc_skb(skb, rxrpc_skb_purged, refcount_read(&skb->users), n, here); kfree_skb(skb); } -- cgit v1.2.1 From d0d5c0cd1e711c98703f3544c1e6fc1372898de5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 27 Aug 2019 10:13:46 +0100 Subject: rxrpc: Use skb_unshare() rather than skb_cow_data() The in-place decryption routines in AF_RXRPC's rxkad security module currently call skb_cow_data() to make sure the data isn't shared and that the skb can be written over. This has a problem, however, as the softirq handler may be still holding a ref or the Rx ring may be holding multiple refs when skb_cow_data() is called in rxkad_verify_packet() - and so skb_shared() returns true and __pskb_pull_tail() dislikes that. If this occurs, something like the following report will be generated. kernel BUG at net/core/skbuff.c:1463! ... RIP: 0010:pskb_expand_head+0x253/0x2b0 ... Call Trace: __pskb_pull_tail+0x49/0x460 skb_cow_data+0x6f/0x300 rxkad_verify_packet+0x18b/0xb10 [rxrpc] rxrpc_recvmsg_data.isra.11+0x4a8/0xa10 [rxrpc] rxrpc_kernel_recv_data+0x126/0x240 [rxrpc] afs_extract_data+0x51/0x2d0 [kafs] afs_deliver_fs_fetch_data+0x188/0x400 [kafs] afs_deliver_to_call+0xac/0x430 [kafs] afs_wait_for_call_to_complete+0x22f/0x3d0 [kafs] afs_make_call+0x282/0x3f0 [kafs] afs_fs_fetch_data+0x164/0x300 [kafs] afs_fetch_data+0x54/0x130 [kafs] afs_readpages+0x20d/0x340 [kafs] read_pages+0x66/0x180 __do_page_cache_readahead+0x188/0x1a0 ondemand_readahead+0x17d/0x2e0 generic_file_read_iter+0x740/0xc10 __vfs_read+0x145/0x1a0 vfs_read+0x8c/0x140 ksys_read+0x4a/0xb0 do_syscall_64+0x43/0xf0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix this by using skb_unshare() instead in the input path for DATA packets that have a security index != 0. Non-DATA packets don't need in-place encryption and neither do unencrypted DATA packets. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Reported-by: Julian Wollrath Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 12 ++++++++---- net/rxrpc/ar-internal.h | 1 + net/rxrpc/input.c | 18 ++++++++++++++++++ net/rxrpc/rxkad.c | 32 +++++++++----------------------- net/rxrpc/skbuff.c | 25 ++++++++++++++++++++----- 5 files changed, 56 insertions(+), 32 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e2356c51883b..a13a62db3565 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -32,6 +32,8 @@ enum rxrpc_skb_trace { rxrpc_skb_received, rxrpc_skb_rotated, rxrpc_skb_seen, + rxrpc_skb_unshared, + rxrpc_skb_unshared_nomem, }; enum rxrpc_local_trace { @@ -231,7 +233,9 @@ enum rxrpc_tx_point { EM(rxrpc_skb_purged, "PUR") \ EM(rxrpc_skb_received, "RCV") \ EM(rxrpc_skb_rotated, "ROT") \ - E_(rxrpc_skb_seen, "SEE") + EM(rxrpc_skb_seen, "SEE") \ + EM(rxrpc_skb_unshared, "UNS") \ + E_(rxrpc_skb_unshared_nomem, "US0") #define rxrpc_local_traces \ EM(rxrpc_local_got, "GOT") \ @@ -633,9 +637,9 @@ TRACE_EVENT(rxrpc_call, TRACE_EVENT(rxrpc_skb, TP_PROTO(struct sk_buff *skb, enum rxrpc_skb_trace op, - int usage, int mod_count, const void *where), + int usage, int mod_count, u8 flags, const void *where), - TP_ARGS(skb, op, usage, mod_count, where), + TP_ARGS(skb, op, usage, mod_count, flags, where), TP_STRUCT__entry( __field(struct sk_buff *, skb ) @@ -648,7 +652,7 @@ TRACE_EVENT(rxrpc_skb, TP_fast_assign( __entry->skb = skb; - __entry->flags = rxrpc_skb(skb)->rx_flags; + __entry->flags = flags; __entry->op = op; __entry->usage = usage; __entry->mod_count = mod_count; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 2d5294f3e62f..852e58781fda 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -1110,6 +1110,7 @@ void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); void rxrpc_packet_destructor(struct sk_buff *); void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_eaten_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_purge_queue(struct sk_buff_head *); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 31090bdf1fae..d122c53c8697 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1249,6 +1249,24 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) goto bad_message; if (!rxrpc_validate_data(skb)) goto bad_message; + + /* Unshare the packet so that it can be modified for in-place + * decryption. + */ + if (sp->hdr.securityIndex != 0) { + struct sk_buff *nskb = skb_unshare(skb, GFP_ATOMIC); + if (!nskb) { + rxrpc_eaten_skb(skb, rxrpc_skb_unshared_nomem); + goto out; + } + + if (nskb != skb) { + rxrpc_eaten_skb(skb, rxrpc_skb_received); + rxrpc_new_skb(skb, rxrpc_skb_unshared); + skb = nskb; + sp = rxrpc_skb(skb); + } + } break; case RXRPC_PACKET_TYPE_CHALLENGE: diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index ae8cd8926456..c60c520fde7c 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -187,10 +187,8 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, struct rxrpc_skb_priv *sp; struct rxrpc_crypt iv; struct scatterlist sg[16]; - struct sk_buff *trailer; unsigned int len; u16 check; - int nsg; int err; sp = rxrpc_skb(skb); @@ -214,15 +212,14 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, crypto_skcipher_encrypt(req); /* we want to encrypt the skbuff in-place */ - nsg = skb_cow_data(skb, 0, &trailer); - err = -ENOMEM; - if (nsg < 0 || nsg > 16) + err = -EMSGSIZE; + if (skb_shinfo(skb)->nr_frags > 16) goto out; len = data_size + call->conn->size_align - 1; len &= ~(call->conn->size_align - 1); - sg_init_table(sg, nsg); + sg_init_table(sg, ARRAY_SIZE(sg)); err = skb_to_sgvec(skb, sg, 0, len); if (unlikely(err < 0)) goto out; @@ -319,11 +316,10 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, struct rxkad_level1_hdr sechdr; struct rxrpc_crypt iv; struct scatterlist sg[16]; - struct sk_buff *trailer; bool aborted; u32 data_size, buf; u16 check; - int nsg, ret; + int ret; _enter(""); @@ -336,11 +332,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. */ - nsg = skb_cow_data(skb, 0, &trailer); - if (nsg < 0 || nsg > 16) - goto nomem; - - sg_init_table(sg, nsg); + sg_init_table(sg, ARRAY_SIZE(sg)); ret = skb_to_sgvec(skb, sg, offset, 8); if (unlikely(ret < 0)) return ret; @@ -388,10 +380,6 @@ protocol_error: if (aborted) rxrpc_send_abort_packet(call); return -EPROTO; - -nomem: - _leave(" = -ENOMEM"); - return -ENOMEM; } /* @@ -406,7 +394,6 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, struct rxkad_level2_hdr sechdr; struct rxrpc_crypt iv; struct scatterlist _sg[4], *sg; - struct sk_buff *trailer; bool aborted; u32 data_size, buf; u16 check; @@ -423,12 +410,11 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. */ - nsg = skb_cow_data(skb, 0, &trailer); - if (nsg < 0) - goto nomem; - sg = _sg; - if (unlikely(nsg > 4)) { + nsg = skb_shinfo(skb)->nr_frags; + if (nsg <= 4) { + nsg = 4; + } else { sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO); if (!sg) goto nomem; diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 8e6f45f84b9b..0348d2bf6f7d 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -24,7 +24,8 @@ void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); int n = atomic_inc_return(select_skb_count(skb)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, + rxrpc_skb(skb)->rx_flags, here); } /* @@ -35,7 +36,8 @@ void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) const void *here = __builtin_return_address(0); if (skb) { int n = atomic_read(select_skb_count(skb)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, + rxrpc_skb(skb)->rx_flags, here); } } @@ -46,10 +48,21 @@ void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { const void *here = __builtin_return_address(0); int n = atomic_inc_return(select_skb_count(skb)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, + rxrpc_skb(skb)->rx_flags, here); skb_get(skb); } +/* + * Note the dropping of a ref on a socket buffer by the core. + */ +void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +{ + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(&rxrpc_n_rx_skbs); + trace_rxrpc_skb(skb, op, 0, n, 0, here); +} + /* * Note the destruction of a socket buffer. */ @@ -60,7 +73,8 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) int n; CHECK_SLAB_OKAY(&skb->users); n = atomic_dec_return(select_skb_count(skb)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, + rxrpc_skb(skb)->rx_flags, here); kfree_skb(skb); } } @@ -75,7 +89,8 @@ void rxrpc_purge_queue(struct sk_buff_head *list) while ((skb = skb_dequeue((list))) != NULL) { int n = atomic_dec_return(select_skb_count(skb)); trace_rxrpc_skb(skb, rxrpc_skb_purged, - refcount_read(&skb->users), n, here); + refcount_read(&skb->users), n, + rxrpc_skb(skb)->rx_flags, here); kfree_skb(skb); } } -- cgit v1.2.1 From 3cf2f450fff304be9cf4868bf0df17f253bc5b1c Mon Sep 17 00:00:00 2001 From: Todd Seidelmann Date: Wed, 21 Aug 2019 11:47:53 -0400 Subject: netfilter: xt_physdev: Fix spurious error message in physdev_mt_check Simplify the check in physdev_mt_check() to emit an error message only when passed an invalid chain (ie, NF_INET_LOCAL_OUT). This avoids cluttering up the log with errors against valid rules. For large/heavily modified rulesets, current behavior can quickly overwhelm the ring buffer, because this function gets called on every change, regardless of the rule that was changed. Signed-off-by: Todd Seidelmann Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_physdev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index ead7c6022208..b92b22ce8abd 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -101,11 +101,9 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) if (info->bitmask & (XT_PHYSDEV_OP_OUT | XT_PHYSDEV_OP_ISOUT) && (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || info->invert & XT_PHYSDEV_OP_BRIDGED) && - par->hook_mask & ((1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) { + par->hook_mask & (1 << NF_INET_LOCAL_OUT)) { pr_info_ratelimited("--physdev-out and --physdev-is-out only supported in the FORWARD and POSTROUTING chains with bridged traffic\n"); - if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) - return -EINVAL; + return -EINVAL; } if (!brnf_probed) { -- cgit v1.2.1 From 3a069024d371125227de3ac8fa74223fcf473520 Mon Sep 17 00:00:00 2001 From: Thomas Jarosch Date: Wed, 21 Aug 2019 16:14:28 +0200 Subject: netfilter: nf_conntrack_ftp: Fix debug output The find_pattern() debug output was printing the 'skip' character. This can be a NULL-byte and messes up further pr_debug() output. Output without the fix: kernel: nf_conntrack_ftp: Pattern matches! kernel: nf_conntrack_ftp: Skipped up to `<7>nf_conntrack_ftp: find_pattern `PORT': dlen = 8 kernel: nf_conntrack_ftp: find_pattern `EPRT': dlen = 8 Output with the fix: kernel: nf_conntrack_ftp: Pattern matches! kernel: nf_conntrack_ftp: Skipped up to 0x0 delimiter! kernel: nf_conntrack_ftp: Match succeeded! kernel: nf_conntrack_ftp: conntrack_ftp: match `172,17,0,100,200,207' (20 bytes at 4150681645) kernel: nf_conntrack_ftp: find_pattern `PORT': dlen = 8 Signed-off-by: Thomas Jarosch Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_ftp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 0ecb3e289ef2..8d96738b7dfd 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -322,7 +322,7 @@ static int find_pattern(const char *data, size_t dlen, i++; } - pr_debug("Skipped up to `%c'!\n", skip); + pr_debug("Skipped up to 0x%hhx delimiter!\n", skip); *numoff = i; *numlen = getnum(data + i, dlen - i, cmd, term, numoff); -- cgit v1.2.1 From 4d82fa67dd6b0e2635ae9dad44fbf3d747eca9ed Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 27 Aug 2019 07:39:50 +0100 Subject: mfd: rk808: Make PM function declaration static Avoids: ../drivers/mfd/rk808.c:771:1: warning: symbol 'rk8xx_pm_ops' \ was not declared. Should it be static? Fixes: 5752bc4373b2 ("mfd: rk808: Mark pm functions __maybe_unused") Reviewed-by: Arnd Bergmann Signed-off-by: Lee Jones --- drivers/mfd/rk808.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/rk808.c b/drivers/mfd/rk808.c index 9a9e6315ba46..050478cabc95 100644 --- a/drivers/mfd/rk808.c +++ b/drivers/mfd/rk808.c @@ -768,7 +768,7 @@ static int __maybe_unused rk8xx_resume(struct device *dev) return ret; } -SIMPLE_DEV_PM_OPS(rk8xx_pm_ops, rk8xx_suspend, rk8xx_resume); +static SIMPLE_DEV_PM_OPS(rk8xx_pm_ops, rk8xx_suspend, rk8xx_resume); static struct i2c_driver rk808_i2c_driver = { .driver = { -- cgit v1.2.1 From 71affe9be45a5c60b9772e1b2701710712637274 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 26 Aug 2019 20:41:16 -0400 Subject: NFSv2: Fix eof handling If we received a reply from the server with a zero length read and no error, then that implies we are at eof. Signed-off-by: Trond Myklebust --- fs/nfs/proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 5552fa8b6e12..ec79d2214a78 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -594,7 +594,8 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) /* Emulate the eof flag, which isn't normally needed in NFSv2 * as it is guaranteed to always return the file attributes */ - if (hdr->args.offset + hdr->res.count >= hdr->res.fattr->size) + if ((hdr->res.count == 0 && hdr->args.count > 0) || + hdr->args.offset + hdr->res.count >= hdr->res.fattr->size) hdr->res.eof = 1; } return 0; -- cgit v1.2.1 From d33d4beb522987d1c305c12500796f9be3687dee Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 27 Aug 2019 07:03:28 -0400 Subject: NFSv2: Fix write regression Ensure we update the write result count on success, since the RPC call itself does not do so. Reported-by: Jan Stancek Reported-by: Naresh Kamboju Signed-off-by: Trond Myklebust Tested-by: Jan Stancek --- fs/nfs/proc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index ec79d2214a78..0f7288b94633 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -616,8 +616,10 @@ static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - if (task->tk_status >= 0) + if (task->tk_status >= 0) { + hdr->res.count = hdr->args.count; nfs_writeback_update_inode(hdr); + } return 0; } -- cgit v1.2.1 From 99300a85260c2b7febd57082a617d1062532067e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 27 Aug 2019 15:16:36 +0800 Subject: NFS: remove set but not used variable 'mapping' Fixes gcc '-Wunused-but-set-variable' warning: fs/nfs/write.c: In function nfs_page_async_flush: fs/nfs/write.c:609:24: warning: variable mapping set but not used [-Wunused-but-set-variable] It is not use since commit aefb623c422e ("NFS: Fix writepage(s) error handling to not report errors twice") Reported-by: Hulk Robot Signed-off-by: YueHaibing Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d193042fa228..85ca49549b39 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -606,7 +606,6 @@ static void nfs_write_error(struct nfs_page *req, int error) static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, struct page *page) { - struct address_space *mapping; struct nfs_page *req; int ret = 0; @@ -621,7 +620,6 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags)); /* If there is a fatal error that covers this write, just exit */ - mapping = page_file_mapping(page); ret = pgio->pg_error; if (nfs_error_is_fatal_on_server(ret)) goto out_launder; -- cgit v1.2.1 From 2a1a3fa0f29270583f0e6e3100d609e09697add1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 24 Aug 2019 14:12:31 +0100 Subject: kallsyms: Don't let kallsyms_lookup_size_offset() fail on retrieving the first symbol An arm64 kernel configured with CONFIG_KPROBES=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set CONFIG_KALLSYMS_BASE_RELATIVE=y reports the following kprobe failure: [ 0.032677] kprobes: failed to populate blacklist: -22 [ 0.033376] Please take care of using kprobes. It appears that kprobe fails to retrieve the symbol at address 0xffff000010081000, despite this symbol being in System.map: ffff000010081000 T __exception_text_start This symbol is part of the first group of aliases in the kallsyms_offsets array (symbol names generated using ugly hacks in scripts/kallsyms.c): kallsyms_offsets: .long 0x1000 // do_undefinstr .long 0x1000 // efi_header_end .long 0x1000 // _stext .long 0x1000 // __exception_text_start .long 0x12b0 // do_cp15instr Looking at the implementation of get_symbol_pos(), it returns the lowest index for aliasing symbols. In this case, it return 0. But kallsyms_lookup_size_offset() considers 0 as a failure, which is obviously wrong (there is definitely a valid symbol living there). In turn, the kprobe blacklisting stops abruptly, hence the original error. A CONFIG_KALLSYMS_ALL kernel wouldn't fail as there is always some random symbols at the beginning of this array, which are never looked up via kallsyms_lookup_size_offset. Fix it by considering that get_symbol_pos() is always successful (which is consistent with the other uses of this function). Fixes: ffc5089196446 ("[PATCH] Create kallsyms_lookup_size_offset()") Reviewed-by: Masami Hiramatsu Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- kernel/kallsyms.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 95a260f9214b..136ce049c4ad 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -263,8 +263,10 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, { char namebuf[KSYM_NAME_LEN]; - if (is_ksym_addr(addr)) - return !!get_symbol_pos(addr, symbolsize, offset); + if (is_ksym_addr(addr)) { + get_symbol_pos(addr, symbolsize, offset); + return 1; + } return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf) || !!__bpf_address_lookup(addr, symbolsize, offset, namebuf); } -- cgit v1.2.1 From d4a8061a7c5f7c27a2dc002ee4cb89b3e6637e44 Mon Sep 17 00:00:00 2001 From: Heyi Guo Date: Tue, 27 Aug 2019 12:26:50 +0100 Subject: KVM: arm/arm64: vgic: Fix potential deadlock when ap_list is long If the ap_list is longer than 256 entries, merge_final() in list_sort() will call the comparison callback with the same element twice, causing a deadlock in vgic_irq_cmp(). Fix it by returning early when irqa == irqb. Cc: stable@vger.kernel.org # 4.7+ Fixes: 8e4447457965 ("KVM: arm/arm64: vgic-new: Add IRQ sorting") Signed-off-by: Zenghui Yu Signed-off-by: Heyi Guo [maz: massaged commit log and patch, added Fixes and Cc-stable] Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- virt/kvm/arm/vgic/vgic.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 04786c8ec77e..ca5e6c6866a4 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -254,6 +254,13 @@ static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b) bool penda, pendb; int ret; + /* + * list_sort may call this function with the same element when + * the list is fairly long. + */ + if (unlikely(irqa == irqb)) + return 0; + raw_spin_lock(&irqa->irq_lock); raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING); -- cgit v1.2.1 From 478553fd1b6f819390b64a2e13ac756c4d1a2836 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Aug 2019 13:24:52 +0200 Subject: netfilter: conntrack: make sysctls per-namespace again When I merged the extension sysctl tables with the main one I forgot to reset them on netns creation. They currently read/write init_net settings. Fixes: d912dec12428 ("netfilter: conntrack: merge acct and helper sysctl table with main one") Fixes: cb2833ed0044 ("netfilter: conntrack: merge ecache and timestamp sysctl tables with main one") Reported-by: Shmulik Ladkani Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index e0d392cb3075..0006503d2da9 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -1037,8 +1037,13 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) table[NF_SYSCTL_CT_COUNT].data = &net->ct.count; table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum; table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid; + table[NF_SYSCTL_CT_ACCT].data = &net->ct.sysctl_acct; + table[NF_SYSCTL_CT_HELPER].data = &net->ct.sysctl_auto_assign_helper; #ifdef CONFIG_NF_CONNTRACK_EVENTS table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events; +#endif +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + table[NF_SYSCTL_CT_TIMESTAMP].data = &net->ct.sysctl_tstamp; #endif table[NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC].data = &nf_generic_pernet(net)->timeout; table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP].data = &nf_icmp_pernet(net)->timeout; -- cgit v1.2.1 From 83e09d5bddbee749fc83063890244397896a1971 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 19 Aug 2019 13:17:53 +0800 Subject: drm/amd/powerplay: correct Vega20 dpm level related settings Correct the settings for auto mode and skip the unnecessary settings for dcefclk and fclk. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 60 +++++++++++++++++++--- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 81658dc8fafc..90c4e87ac5ad 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -2353,12 +2353,16 @@ static int vega20_force_dpm_highest(struct pp_hwmgr *hwmgr) data->dpm_table.soc_table.dpm_state.soft_max_level = data->dpm_table.soc_table.dpm_levels[soft_level].value; - ret = vega20_upload_dpm_min_level(hwmgr, 0xFFFFFFFF); + ret = vega20_upload_dpm_min_level(hwmgr, FEATURE_DPM_GFXCLK_MASK | + FEATURE_DPM_UCLK_MASK | + FEATURE_DPM_SOCCLK_MASK); PP_ASSERT_WITH_CODE(!ret, "Failed to upload boot level to highest!", return ret); - ret = vega20_upload_dpm_max_level(hwmgr, 0xFFFFFFFF); + ret = vega20_upload_dpm_max_level(hwmgr, FEATURE_DPM_GFXCLK_MASK | + FEATURE_DPM_UCLK_MASK | + FEATURE_DPM_SOCCLK_MASK); PP_ASSERT_WITH_CODE(!ret, "Failed to upload dpm max level to highest!", return ret); @@ -2391,12 +2395,16 @@ static int vega20_force_dpm_lowest(struct pp_hwmgr *hwmgr) data->dpm_table.soc_table.dpm_state.soft_max_level = data->dpm_table.soc_table.dpm_levels[soft_level].value; - ret = vega20_upload_dpm_min_level(hwmgr, 0xFFFFFFFF); + ret = vega20_upload_dpm_min_level(hwmgr, FEATURE_DPM_GFXCLK_MASK | + FEATURE_DPM_UCLK_MASK | + FEATURE_DPM_SOCCLK_MASK); PP_ASSERT_WITH_CODE(!ret, "Failed to upload boot level to highest!", return ret); - ret = vega20_upload_dpm_max_level(hwmgr, 0xFFFFFFFF); + ret = vega20_upload_dpm_max_level(hwmgr, FEATURE_DPM_GFXCLK_MASK | + FEATURE_DPM_UCLK_MASK | + FEATURE_DPM_SOCCLK_MASK); PP_ASSERT_WITH_CODE(!ret, "Failed to upload dpm max level to highest!", return ret); @@ -2407,14 +2415,54 @@ static int vega20_force_dpm_lowest(struct pp_hwmgr *hwmgr) static int vega20_unforce_dpm_levels(struct pp_hwmgr *hwmgr) { + struct vega20_hwmgr *data = + (struct vega20_hwmgr *)(hwmgr->backend); + uint32_t soft_min_level, soft_max_level; int ret = 0; - ret = vega20_upload_dpm_min_level(hwmgr, 0xFFFFFFFF); + /* gfxclk soft min/max settings */ + soft_min_level = + vega20_find_lowest_dpm_level(&(data->dpm_table.gfx_table)); + soft_max_level = + vega20_find_highest_dpm_level(&(data->dpm_table.gfx_table)); + + data->dpm_table.gfx_table.dpm_state.soft_min_level = + data->dpm_table.gfx_table.dpm_levels[soft_min_level].value; + data->dpm_table.gfx_table.dpm_state.soft_max_level = + data->dpm_table.gfx_table.dpm_levels[soft_max_level].value; + + /* uclk soft min/max settings */ + soft_min_level = + vega20_find_lowest_dpm_level(&(data->dpm_table.mem_table)); + soft_max_level = + vega20_find_highest_dpm_level(&(data->dpm_table.mem_table)); + + data->dpm_table.mem_table.dpm_state.soft_min_level = + data->dpm_table.mem_table.dpm_levels[soft_min_level].value; + data->dpm_table.mem_table.dpm_state.soft_max_level = + data->dpm_table.mem_table.dpm_levels[soft_max_level].value; + + /* socclk soft min/max settings */ + soft_min_level = + vega20_find_lowest_dpm_level(&(data->dpm_table.soc_table)); + soft_max_level = + vega20_find_highest_dpm_level(&(data->dpm_table.soc_table)); + + data->dpm_table.soc_table.dpm_state.soft_min_level = + data->dpm_table.soc_table.dpm_levels[soft_min_level].value; + data->dpm_table.soc_table.dpm_state.soft_max_level = + data->dpm_table.soc_table.dpm_levels[soft_max_level].value; + + ret = vega20_upload_dpm_min_level(hwmgr, FEATURE_DPM_GFXCLK_MASK | + FEATURE_DPM_UCLK_MASK | + FEATURE_DPM_SOCCLK_MASK); PP_ASSERT_WITH_CODE(!ret, "Failed to upload DPM Bootup Levels!", return ret); - ret = vega20_upload_dpm_max_level(hwmgr, 0xFFFFFFFF); + ret = vega20_upload_dpm_max_level(hwmgr, FEATURE_DPM_GFXCLK_MASK | + FEATURE_DPM_UCLK_MASK | + FEATURE_DPM_SOCCLK_MASK); PP_ASSERT_WITH_CODE(!ret, "Failed to upload DPM Max Levels!", return ret); -- cgit v1.2.1 From 317a3aaef94d73ba6be88aea11b41bb631b2d581 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 27 Aug 2019 17:33:32 +0800 Subject: drm/amdgpu: Add APTX quirk for Dell Latitude 5495 Needs ATPX rather than _PR3 to really turn off the dGPU. This can save ~5W when dGPU is runtime-suspended. Signed-off-by: Kai-Heng Feng Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 9b384a94d2f3..3e35a8f2c5e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -574,6 +574,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = { { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX }, + { 0x1002, 0x699f, 0x1028, 0x0814, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1025, 0x125A, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x17AA, 0x3806, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0, 0, 0, 0, 0 }, -- cgit v1.2.1 From 41940ff50f6c347f3541163702566cd526200d98 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Tue, 27 Aug 2019 22:59:45 +0800 Subject: drm/amdgpu: fix GFXOFF on Picasso and Raven2 For picasso(adev->pdev->device == 0x15d8)&raven2(adev->rev_id >= 0x8), firmware is sufficient to support gfxoff. In commit 98f58ada2d37e, for picasso&raven2, return directly and cause gfxoff disabled. Fixes: 98f58ada2d37 ("drm/amdgpu/gfx9: update pg_flags after determining if gfx off is possible") Reviewed-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Aaron Liu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 4ea67f94cae2..c066e1d3f981 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -596,14 +596,14 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) case CHIP_VEGA20: break; case CHIP_RAVEN: - if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) - break; - if ((adev->gfx.rlc_fw_version != 106 && - adev->gfx.rlc_fw_version < 531) || - (adev->gfx.rlc_fw_version == 53815) || - (adev->gfx.rlc_feature_version < 1) || - !adev->gfx.rlc.is_rlc_v2_1) + if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) + &&((adev->gfx.rlc_fw_version != 106 && + adev->gfx.rlc_fw_version < 531) || + (adev->gfx.rlc_fw_version == 53815) || + (adev->gfx.rlc_feature_version < 1) || + !adev->gfx.rlc.is_rlc_v2_1)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + if (adev->pm.pp_feature & PP_GFXOFF_MASK) adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_CP | -- cgit v1.2.1 From ea1529873ab18c204688cf31746df851c098cbea Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 27 Aug 2019 18:04:02 +0200 Subject: KVM: x86: hyper-v: don't crash on KVM_GET_SUPPORTED_HV_CPUID when kvm_intel.nested is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If kvm_intel is loaded with nested=0 parameter an attempt to perform KVM_GET_SUPPORTED_HV_CPUID results in OOPS as nested_get_evmcs_version hook in kvm_x86_ops is NULL (we assign it in nested_vmx_hardware_setup() and this only happens in case nested is enabled). Check that kvm_x86_ops->nested_get_evmcs_version is not NULL before calling it. With this, we can remove the stub from svm as it is no longer needed. Cc: Fixes: e2e871ab2f02 ("x86/kvm/hyper-v: Introduce nested_get_evmcs_version() helper") Signed-off-by: Vitaly Kuznetsov Reviewed-by: Jim Mattson Signed-off-by: Radim Krčmář --- arch/x86/kvm/hyperv.c | 5 ++++- arch/x86/kvm/svm.c | 8 +------- arch/x86/kvm/vmx/vmx.c | 1 + 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index c10a8b10b203..fff790a3f4ee 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1781,7 +1781,7 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries) { - uint16_t evmcs_ver = kvm_x86_ops->nested_get_evmcs_version(vcpu); + uint16_t evmcs_ver = 0; struct kvm_cpuid_entry2 cpuid_entries[] = { { .function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS }, { .function = HYPERV_CPUID_INTERFACE }, @@ -1793,6 +1793,9 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, }; int i, nent = ARRAY_SIZE(cpuid_entries); + if (kvm_x86_ops->nested_get_evmcs_version) + evmcs_ver = kvm_x86_ops->nested_get_evmcs_version(vcpu); + /* Skip NESTED_FEATURES if eVMCS is not supported */ if (!evmcs_ver) --nent; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e3d3b2128f2b..e0368076a1ef 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -7128,12 +7128,6 @@ failed: return ret; } -static uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu) -{ - /* Not supported */ - return 0; -} - static int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version) { @@ -7332,7 +7326,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .mem_enc_unreg_region = svm_unregister_enc_region, .nested_enable_evmcs = nested_enable_evmcs, - .nested_get_evmcs_version = nested_get_evmcs_version, + .nested_get_evmcs_version = NULL, .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, }; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 42ed3faa6af8..c030c96fc81a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7797,6 +7797,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .set_nested_state = NULL, .get_vmcs12_pages = NULL, .nested_enable_evmcs = NULL, + .nested_get_evmcs_version = NULL, .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, }; -- cgit v1.2.1 From 75ee23b30dc712d80d2421a9a547e7ab6e379b44 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 23 Aug 2019 13:55:44 -0700 Subject: KVM: x86: Don't update RIP or do single-step on faulting emulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't advance RIP or inject a single-step #DB if emulation signals a fault. This logic applies to all state updates that are conditional on clean retirement of the emulation instruction, e.g. updating RFLAGS was previously handled by commit 38827dbd3fb85 ("KVM: x86: Do not update EFLAGS on faulting emulation"). Not advancing RIP is likely a nop, i.e. ctxt->eip isn't updated with ctxt->_eip until emulation "retires" anyways. Skipping #DB injection fixes a bug reported by Andy Lutomirski where a #UD on SYSCALL due to invalid state with EFLAGS.TF=1 would loop indefinitely due to emulation overwriting the #UD with #DB and thus restarting the bad SYSCALL over and over. Cc: Nadav Amit Cc: stable@vger.kernel.org Reported-by: Andy Lutomirski Fixes: 663f4c61b803 ("KVM: x86: handle singlestep during emulation") Signed-off-by: Sean Christopherson Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 93b0bd45ac73..290c3c3efb87 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6594,12 +6594,13 @@ restart: unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); toggle_interruptibility(vcpu, ctxt->interruptibility); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; - kvm_rip_write(vcpu, ctxt->eip); - if (r == EMULATE_DONE && ctxt->tf) - kvm_vcpu_do_singlestep(vcpu, &r); if (!ctxt->have_exception || - exception_type(ctxt->exception.vector) == EXCPT_TRAP) + exception_type(ctxt->exception.vector) == EXCPT_TRAP) { + kvm_rip_write(vcpu, ctxt->eip); + if (r == EMULATE_DONE && ctxt->tf) + kvm_vcpu_do_singlestep(vcpu, &r); __kvm_set_rflags(vcpu, ctxt->eflags); + } /* * For STI, interrupts are shadowed; so KVM_REQ_EVENT will -- cgit v1.2.1 From f2aee329a68f5a907bcff11a109dfe17c0b41aeb Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 22 Aug 2019 08:09:50 +1000 Subject: cifs: set domainName when a domain-key is used in multiuser RHBZ: 1710429 When we use a domain-key to authenticate using multiuser we must also set the domainnmame for the new volume as it will be used and passed to the server in the NTLMSSP Domain-name. Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/connect.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 1795e80cbdf7..9d2576f31689 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2981,6 +2981,7 @@ static int cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) { int rc = 0; + int is_domain = 0; const char *delim, *payload; char *desc; ssize_t len; @@ -3028,6 +3029,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) rc = PTR_ERR(key); goto out_err; } + is_domain = 1; } down_read(&key->sem); @@ -3085,6 +3087,26 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) goto out_key_put; } + /* + * If we have a domain key then we must set the domainName in the + * for the request. + */ + if (is_domain && ses->domainName) { + vol->domainname = kstrndup(ses->domainName, + strlen(ses->domainName), + GFP_KERNEL); + if (!vol->domainname) { + cifs_dbg(FYI, "Unable to allocate %zd bytes for " + "domain\n", len); + rc = -ENOMEM; + kfree(vol->username); + vol->username = NULL; + kfree(vol->password); + vol->password = NULL; + goto out_key_put; + } + } + out_key_put: up_read(&key->sem); key_put(key); -- cgit v1.2.1 From 478228e57f81f6cb60798d54fc02a74ea7dd267e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 27 Aug 2019 13:59:17 +0300 Subject: cifs: Use kzfree() to zero out the password It's safer to zero out the password so that it can never be disclosed. Fixes: 0c219f5799c7 ("cifs: set domainName when a domain-key is used in multiuser") Signed-off-by: Dan Carpenter Signed-off-by: Steve French --- fs/cifs/connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 9d2576f31689..ddefddeffd06 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3101,7 +3101,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) rc = -ENOMEM; kfree(vol->username); vol->username = NULL; - kfree(vol->password); + kzfree(vol->password); vol->password = NULL; goto out_key_put; } -- cgit v1.2.1 From 981471bd3abf4d572097645d765391533aac327d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 25 Aug 2019 10:01:32 -0700 Subject: net_sched: fix a NULL pointer deref in ipt action The net pointer in struct xt_tgdtor_param is not explicitly initialized therefore is still NULL when dereferencing it. So we have to find a way to pass the correct net pointer to ipt_destroy_target(). The best way I find is just saving the net pointer inside the per netns struct tcf_idrinfo, which could make this patch smaller. Fixes: 0c66dc1ea3f0 ("netfilter: conntrack: register hooks in netns when needed by ruleset") Reported-and-tested-by: itugrok@yahoo.com Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 4 +++- net/sched/act_bpf.c | 2 +- net/sched/act_connmark.c | 2 +- net/sched/act_csum.c | 2 +- net/sched/act_ct.c | 2 +- net/sched/act_ctinfo.c | 2 +- net/sched/act_gact.c | 2 +- net/sched/act_ife.c | 2 +- net/sched/act_ipt.c | 11 ++++++----- net/sched/act_mirred.c | 2 +- net/sched/act_mpls.c | 2 +- net/sched/act_nat.c | 2 +- net/sched/act_pedit.c | 2 +- net/sched/act_police.c | 2 +- net/sched/act_sample.c | 2 +- net/sched/act_simple.c | 2 +- net/sched/act_skbedit.c | 2 +- net/sched/act_skbmod.c | 2 +- net/sched/act_tunnel_key.c | 2 +- net/sched/act_vlan.c | 2 +- 20 files changed, 27 insertions(+), 24 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index c61a1bf4e3de..3a1a72990fce 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -15,6 +15,7 @@ struct tcf_idrinfo { struct mutex lock; struct idr action_idr; + struct net *net; }; struct tc_action_ops; @@ -108,7 +109,7 @@ struct tc_action_net { }; static inline -int tc_action_net_init(struct tc_action_net *tn, +int tc_action_net_init(struct net *net, struct tc_action_net *tn, const struct tc_action_ops *ops) { int err = 0; @@ -117,6 +118,7 @@ int tc_action_net_init(struct tc_action_net *tn, if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; + tn->idrinfo->net = net; mutex_init(&tn->idrinfo->lock); idr_init(&tn->idrinfo->action_idr); return err; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index fd1f7e799e23..04b7bd4ec751 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -422,7 +422,7 @@ static __net_init int bpf_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tc_action_net_init(tn, &act_bpf_ops); + return tc_action_net_init(net, tn, &act_bpf_ops); } static void __net_exit bpf_exit_net(struct list_head *net_list) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 32ac04d77a45..2b43cacf82af 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -231,7 +231,7 @@ static __net_init int connmark_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tc_action_net_init(tn, &act_connmark_ops); + return tc_action_net_init(net, tn, &act_connmark_ops); } static void __net_exit connmark_exit_net(struct list_head *net_list) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 9b9288267a54..d3cfad88dc3a 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -714,7 +714,7 @@ static __net_init int csum_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tc_action_net_init(tn, &act_csum_ops); + return tc_action_net_init(net, tn, &act_csum_ops); } static void __net_exit csum_exit_net(struct list_head *net_list) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 33a1a7406e87..cdd6f3818097 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -939,7 +939,7 @@ static __net_init int ct_init_net(struct net *net) tn->labels = true; } - return tc_action_net_init(&tn->tn, &act_ct_ops); + return tc_action_net_init(net, &tn->tn, &act_ct_ops); } static void __net_exit ct_exit_net(struct list_head *net_list) diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 06ef74b74911..0dbcfd1dca7b 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -376,7 +376,7 @@ static __net_init int ctinfo_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ctinfo_net_id); - return tc_action_net_init(tn, &act_ctinfo_ops); + return tc_action_net_init(net, tn, &act_ctinfo_ops); } static void __net_exit ctinfo_exit_net(struct list_head *net_list) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 8f0140c6ca58..324f1d1f6d47 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -278,7 +278,7 @@ static __net_init int gact_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tc_action_net_init(tn, &act_gact_ops); + return tc_action_net_init(net, tn, &act_gact_ops); } static void __net_exit gact_exit_net(struct list_head *net_list) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 92ee853d43e6..3a31e241c647 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -890,7 +890,7 @@ static __net_init int ife_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tc_action_net_init(tn, &act_ife_ops); + return tc_action_net_init(net, tn, &act_ife_ops); } static void __net_exit ife_exit_net(struct list_head *net_list) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index ce2c30a591d2..214a03d405cf 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -61,12 +61,13 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t, return 0; } -static void ipt_destroy_target(struct xt_entry_target *t) +static void ipt_destroy_target(struct xt_entry_target *t, struct net *net) { struct xt_tgdtor_param par = { .target = t->u.kernel.target, .targinfo = t->data, .family = NFPROTO_IPV4, + .net = net, }; if (par.target->destroy != NULL) par.target->destroy(&par); @@ -78,7 +79,7 @@ static void tcf_ipt_release(struct tc_action *a) struct tcf_ipt *ipt = to_ipt(a); if (ipt->tcfi_t) { - ipt_destroy_target(ipt->tcfi_t); + ipt_destroy_target(ipt->tcfi_t, a->idrinfo->net); kfree(ipt->tcfi_t); } kfree(ipt->tcfi_tname); @@ -180,7 +181,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, spin_lock_bh(&ipt->tcf_lock); if (ret != ACT_P_CREATED) { - ipt_destroy_target(ipt->tcfi_t); + ipt_destroy_target(ipt->tcfi_t, net); kfree(ipt->tcfi_tname); kfree(ipt->tcfi_t); } @@ -350,7 +351,7 @@ static __net_init int ipt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tc_action_net_init(tn, &act_ipt_ops); + return tc_action_net_init(net, tn, &act_ipt_ops); } static void __net_exit ipt_exit_net(struct list_head *net_list) @@ -399,7 +400,7 @@ static __net_init int xt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tc_action_net_init(tn, &act_xt_ops); + return tc_action_net_init(net, tn, &act_xt_ops); } static void __net_exit xt_exit_net(struct list_head *net_list) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index be3f88dfc37e..9d1bf508075a 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -453,7 +453,7 @@ static __net_init int mirred_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tc_action_net_init(tn, &act_mirred_ops); + return tc_action_net_init(net, tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct list_head *net_list) diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 0f299e3b618c..e168df0e008a 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -375,7 +375,7 @@ static __net_init int mpls_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, mpls_net_id); - return tc_action_net_init(tn, &act_mpls_ops); + return tc_action_net_init(net, tn, &act_mpls_ops); } static void __net_exit mpls_exit_net(struct list_head *net_list) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 7b858c11b1b5..ea4c5359e7df 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -327,7 +327,7 @@ static __net_init int nat_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tc_action_net_init(tn, &act_nat_ops); + return tc_action_net_init(net, tn, &act_nat_ops); } static void __net_exit nat_exit_net(struct list_head *net_list) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 17360c6faeaa..cdfaa79382a2 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -498,7 +498,7 @@ static __net_init int pedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tc_action_net_init(tn, &act_pedit_ops); + return tc_action_net_init(net, tn, &act_pedit_ops); } static void __net_exit pedit_exit_net(struct list_head *net_list) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 49cec3e64a4d..6315e0f8d26e 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -371,7 +371,7 @@ static __net_init int police_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tc_action_net_init(tn, &act_police_ops); + return tc_action_net_init(net, tn, &act_police_ops); } static void __net_exit police_exit_net(struct list_head *net_list) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 595308d60133..7eff363f9f03 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -265,7 +265,7 @@ static __net_init int sample_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tc_action_net_init(tn, &act_sample_ops); + return tc_action_net_init(net, tn, &act_sample_ops); } static void __net_exit sample_exit_net(struct list_head *net_list) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 33aefa25b545..6120e56117ca 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -232,7 +232,7 @@ static __net_init int simp_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tc_action_net_init(tn, &act_simp_ops); + return tc_action_net_init(net, tn, &act_simp_ops); } static void __net_exit simp_exit_net(struct list_head *net_list) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 37dced00b63d..6a8d3337c577 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -336,7 +336,7 @@ static __net_init int skbedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tc_action_net_init(tn, &act_skbedit_ops); + return tc_action_net_init(net, tn, &act_skbedit_ops); } static void __net_exit skbedit_exit_net(struct list_head *net_list) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 7da3518e18ef..888437f97ba6 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -287,7 +287,7 @@ static __net_init int skbmod_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tc_action_net_init(tn, &act_skbmod_ops); + return tc_action_net_init(net, tn, &act_skbmod_ops); } static void __net_exit skbmod_exit_net(struct list_head *net_list) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 6d0debdc9b97..2f83a79f76aa 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -600,7 +600,7 @@ static __net_init int tunnel_key_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tc_action_net_init(tn, &act_tunnel_key_ops); + return tc_action_net_init(net, tn, &act_tunnel_key_ops); } static void __net_exit tunnel_key_exit_net(struct list_head *net_list) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index a3c9eea1ee8a..287a30bf8930 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -334,7 +334,7 @@ static __net_init int vlan_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tc_action_net_init(tn, &act_vlan_ops); + return tc_action_net_init(net, tn, &act_vlan_ops); } static void __net_exit vlan_exit_net(struct list_head *net_list) -- cgit v1.2.1 From 340625e618e1b37a72a02f07aa7144ae0ab0b19e Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 27 Aug 2019 09:30:14 +1000 Subject: cifs: replace various strncpy with strscpy and similar Using strscpy is cleaner, and avoids some problems with handling maximum length strings. Linus noticed the original problem and Aurelien pointed out some additional problems. Fortunately most of this is SMB1 code (and in particular the ASCII string handling older, which is less common). Reported-by: Linus Torvalds Reviewed-by: Aurelien Aptel Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 1 + fs/cifs/cifssmb.c | 197 +++++++++++++++++----------------------------------- fs/cifs/connect.c | 7 +- fs/cifs/dir.c | 5 +- fs/cifs/misc.c | 22 ++++++ fs/cifs/sess.c | 26 ++++--- 6 files changed, 112 insertions(+), 146 deletions(-) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index e23234207fc2..592a6cea2b79 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -579,6 +579,7 @@ extern void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page, unsigned int *len, unsigned int *offset); void extract_unc_hostname(const char *unc, const char **h, size_t *len); +int copy_path_name(char *dst, const char *src); #ifdef CONFIG_CIFS_DFS_UPCALL static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index e2f95965065d..3907653e63c7 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -942,10 +942,8 @@ PsxDelete: PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB add path length overrun check */ - name_len = strnlen(fileName, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->FileName, fileName, name_len); + } else { + name_len = copy_path_name(pSMB->FileName, fileName); } params = 6 + name_len; @@ -1015,10 +1013,8 @@ DelFileRetry: remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve check for buffer overruns BB */ - name_len = strnlen(name, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->fileName, name, name_len); + } else { + name_len = copy_path_name(pSMB->fileName, name); } pSMB->SearchAttributes = cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM); @@ -1062,10 +1058,8 @@ RmDirRetry: remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve check for buffer overruns BB */ - name_len = strnlen(name, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->DirName, name, name_len); + } else { + name_len = copy_path_name(pSMB->DirName, name); } pSMB->BufferFormat = 0x04; @@ -1107,10 +1101,8 @@ MkDirRetry: remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve check for buffer overruns BB */ - name_len = strnlen(name, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->DirName, name, name_len); + } else { + name_len = copy_path_name(pSMB->DirName, name); } pSMB->BufferFormat = 0x04; @@ -1157,10 +1149,8 @@ PsxCreat: PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(name, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->FileName, name, name_len); + } else { + name_len = copy_path_name(pSMB->FileName, name); } params = 6 + name_len; @@ -1324,11 +1314,9 @@ OldOpenRetry: fileName, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve check for buffer overruns BB */ + } else { count = 0; /* no pad */ - name_len = strnlen(fileName, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->fileName, fileName, name_len); + name_len = copy_path_name(pSMB->fileName, fileName); } if (*pOplock & REQ_OPLOCK) pSMB->OpenFlags = cpu_to_le16(REQ_OPLOCK); @@ -1442,11 +1430,8 @@ openRetry: /* BB improve check for buffer overruns BB */ /* no pad */ count = 0; - name_len = strnlen(path, PATH_MAX); - /* trailing null */ - name_len++; + name_len = copy_path_name(req->fileName, path); req->NameLength = cpu_to_le16(name_len); - strncpy(req->fileName, path, name_len); } if (*oplock & REQ_OPLOCK) @@ -2812,15 +2797,10 @@ renameRetry: remap); name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; name_len2 *= 2; /* convert to bytes */ - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(from_name, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->OldFileName, from_name, name_len); - name_len2 = strnlen(to_name, PATH_MAX); - name_len2++; /* trailing null */ + } else { + name_len = copy_path_name(pSMB->OldFileName, from_name); + name_len2 = copy_path_name(pSMB->OldFileName+name_len+1, to_name); pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */ - strncpy(&pSMB->OldFileName[name_len + 1], to_name, name_len2); - name_len2++; /* trailing null */ name_len2++; /* signature byte */ } @@ -2962,15 +2942,10 @@ copyRetry: toName, PATH_MAX, nls_codepage, remap); name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; name_len2 *= 2; /* convert to bytes */ - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(fromName, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->OldFileName, fromName, name_len); - name_len2 = strnlen(toName, PATH_MAX); - name_len2++; /* trailing null */ + } else { + name_len = copy_path_name(pSMB->OldFileName, fromName); pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */ - strncpy(&pSMB->OldFileName[name_len + 1], toName, name_len2); - name_len2++; /* trailing null */ + name_len2 = copy_path_name(pSMB->OldFileName+name_len+1, toName); name_len2++; /* signature byte */ } @@ -3021,10 +2996,8 @@ createSymLinkRetry: name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(fromName, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->FileName, fromName, name_len); + } else { + name_len = copy_path_name(pSMB->FileName, fromName); } params = 6 + name_len; pSMB->MaxSetupCount = 0; @@ -3044,10 +3017,8 @@ createSymLinkRetry: PATH_MAX, nls_codepage, remap); name_len_target++; /* trailing null */ name_len_target *= 2; - } else { /* BB improve the check for buffer overruns BB */ - name_len_target = strnlen(toName, PATH_MAX); - name_len_target++; /* trailing null */ - strncpy(data_offset, toName, name_len_target); + } else { + name_len_target = copy_path_name(data_offset, toName); } pSMB->MaxParameterCount = cpu_to_le16(2); @@ -3109,10 +3080,8 @@ createHardLinkRetry: name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(toName, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->FileName, toName, name_len); + } else { + name_len = copy_path_name(pSMB->FileName, toName); } params = 6 + name_len; pSMB->MaxSetupCount = 0; @@ -3131,10 +3100,8 @@ createHardLinkRetry: PATH_MAX, nls_codepage, remap); name_len_target++; /* trailing null */ name_len_target *= 2; - } else { /* BB improve the check for buffer overruns BB */ - name_len_target = strnlen(fromName, PATH_MAX); - name_len_target++; /* trailing null */ - strncpy(data_offset, fromName, name_len_target); + } else { + name_len_target = copy_path_name(data_offset, fromName); } pSMB->MaxParameterCount = cpu_to_le16(2); @@ -3213,15 +3180,10 @@ winCreateHardLinkRetry: remap); name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; name_len2 *= 2; /* convert to bytes */ - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(from_name, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->OldFileName, from_name, name_len); - name_len2 = strnlen(to_name, PATH_MAX); - name_len2++; /* trailing null */ + } else { + name_len = copy_path_name(pSMB->OldFileName, from_name); pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */ - strncpy(&pSMB->OldFileName[name_len + 1], to_name, name_len2); - name_len2++; /* trailing null */ + name_len2 = copy_path_name(pSMB->OldFileName+name_len+1, to_name); name_len2++; /* signature byte */ } @@ -3271,10 +3233,8 @@ querySymLinkRetry: remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(searchName, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->FileName, searchName, name_len); + } else { + name_len = copy_path_name(pSMB->FileName, searchName); } params = 2 /* level */ + 4 /* rsrvd */ + name_len /* incl null */ ; @@ -3691,10 +3651,8 @@ queryAclRetry: name_len *= 2; pSMB->FileName[name_len] = 0; pSMB->FileName[name_len+1] = 0; - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(searchName, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->FileName, searchName, name_len); + } else { + name_len = copy_path_name(pSMB->FileName, searchName); } params = 2 /* level */ + 4 /* rsrvd */ + name_len /* incl null */ ; @@ -3776,10 +3734,8 @@ setAclRetry: PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(fileName, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->FileName, fileName, name_len); + } else { + name_len = copy_path_name(pSMB->FileName, fileName); } params = 6 + name_len; pSMB->MaxParameterCount = cpu_to_le16(2); @@ -4184,9 +4140,7 @@ QInfRetry: name_len++; /* trailing null */ name_len *= 2; } else { - name_len = strnlen(search_name, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->FileName, search_name, name_len); + name_len = copy_path_name(pSMB->FileName, search_name); } pSMB->BufferFormat = 0x04; name_len++; /* account for buffer type byte */ @@ -4321,10 +4275,8 @@ QPathInfoRetry: PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(search_name, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->FileName, search_name, name_len); + } else { + name_len = copy_path_name(pSMB->FileName, search_name); } params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */; @@ -4490,10 +4442,8 @@ UnixQPathInfoRetry: PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(searchName, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->FileName, searchName, name_len); + } else { + name_len = copy_path_name(pSMB->FileName, searchName); } params = 2 /* level */ + 4 /* reserved */ + name_len /* includes NUL */; @@ -4593,17 +4543,16 @@ findFirstRetry: pSMB->FileName[name_len+1] = 0; name_len += 2; } - } else { /* BB add check for overrun of SMB buf BB */ - name_len = strnlen(searchName, PATH_MAX); -/* BB fix here and in unicode clause above ie - if (name_len > buffersize-header) - free buffer exit; BB */ - strncpy(pSMB->FileName, searchName, name_len); + } else { + name_len = copy_path_name(pSMB->FileName, searchName); if (msearch) { - pSMB->FileName[name_len] = CIFS_DIR_SEP(cifs_sb); - pSMB->FileName[name_len+1] = '*'; - pSMB->FileName[name_len+2] = 0; - name_len += 3; + if (WARN_ON_ONCE(name_len > PATH_MAX-2)) + name_len = PATH_MAX-2; + /* overwrite nul byte */ + pSMB->FileName[name_len-1] = CIFS_DIR_SEP(cifs_sb); + pSMB->FileName[name_len] = '*'; + pSMB->FileName[name_len+1] = 0; + name_len += 2; } } @@ -4898,10 +4847,8 @@ GetInodeNumberRetry: remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(search_name, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->FileName, search_name, name_len); + } else { + name_len = copy_path_name(pSMB->FileName, search_name); } params = 2 /* level */ + 4 /* rsrvd */ + name_len /* incl null */ ; @@ -5008,9 +4955,7 @@ getDFSRetry: name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(search_name, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->RequestFileName, search_name, name_len); + name_len = copy_path_name(pSMB->RequestFileName, search_name); } if (ses->server->sign) @@ -5663,10 +5608,8 @@ SetEOFRetry: PATH_MAX, cifs_sb->local_nls, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(file_name, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->FileName, file_name, name_len); + } else { + name_len = copy_path_name(pSMB->FileName, file_name); } params = 6 + name_len; data_count = sizeof(struct file_end_of_file_info); @@ -5959,10 +5902,8 @@ SetTimesRetry: PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(fileName, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->FileName, fileName, name_len); + } else { + name_len = copy_path_name(pSMB->FileName, fileName); } params = 6 + name_len; @@ -6040,10 +5981,8 @@ SetAttrLgcyRetry: PATH_MAX, nls_codepage); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(fileName, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->fileName, fileName, name_len); + } else { + name_len = copy_path_name(pSMB->fileName, fileName); } pSMB->attr = cpu_to_le16(dos_attrs); pSMB->BufferFormat = 0x04; @@ -6203,10 +6142,8 @@ setPermsRetry: PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(file_name, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->FileName, file_name, name_len); + } else { + name_len = copy_path_name(pSMB->FileName, file_name); } params = 6 + name_len; @@ -6298,10 +6235,8 @@ QAllEAsRetry: PATH_MAX, nls_codepage, remap); list_len++; /* trailing null */ list_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ - list_len = strnlen(searchName, PATH_MAX); - list_len++; /* trailing null */ - strncpy(pSMB->FileName, searchName, list_len); + } else { + list_len = copy_path_name(pSMB->FileName, searchName); } params = 2 /* level */ + 4 /* reserved */ + list_len /* includes NUL */; @@ -6480,10 +6415,8 @@ SetEARetry: PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; - } else { /* BB improve the check for buffer overruns BB */ - name_len = strnlen(fileName, PATH_MAX); - name_len++; /* trailing null */ - strncpy(pSMB->FileName, fileName, name_len); + } else { + name_len = copy_path_name(pSMB->FileName, fileName); } params = 6 + name_len; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ddefddeffd06..5299effa6f7d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4231,16 +4231,19 @@ build_unc_path_to_root(const struct smb_vol *vol, strlen(vol->prepath) + 1 : 0; unsigned int unc_len = strnlen(vol->UNC, MAX_TREE_SIZE + 1); + if (unc_len > MAX_TREE_SIZE) + return ERR_PTR(-EINVAL); + full_path = kmalloc(unc_len + pplen + 1, GFP_KERNEL); if (full_path == NULL) return ERR_PTR(-ENOMEM); - strncpy(full_path, vol->UNC, unc_len); + memcpy(full_path, vol->UNC, unc_len); pos = full_path + unc_len; if (pplen) { *pos = CIFS_DIR_SEP(cifs_sb); - strncpy(pos + 1, vol->prepath, pplen); + memcpy(pos + 1, vol->prepath, pplen); pos += pplen; } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f26a48dd2e39..be424e81e3ad 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -69,11 +69,10 @@ cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, return full_path; if (dfsplen) - strncpy(full_path, tcon->treeName, dfsplen); + memcpy(full_path, tcon->treeName, dfsplen); full_path[dfsplen] = CIFS_DIR_SEP(cifs_sb); - strncpy(full_path + dfsplen + 1, vol->prepath, pplen); + memcpy(full_path + dfsplen + 1, vol->prepath, pplen); convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); - full_path[dfsplen + pplen] = 0; /* add trailing null */ return full_path; } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index f383877a6511..5ad83bdb9bea 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1011,3 +1011,25 @@ void extract_unc_hostname(const char *unc, const char **h, size_t *len) *h = unc; *len = end - unc; } + +/** + * copy_path_name - copy src path to dst, possibly truncating + * + * returns number of bytes written (including trailing nul) + */ +int copy_path_name(char *dst, const char *src) +{ + int name_len; + + /* + * PATH_MAX includes nul, so if strlen(src) >= PATH_MAX it + * will truncate and strlen(dst) will be PATH_MAX-1 + */ + name_len = strscpy(dst, src, PATH_MAX); + if (WARN_ON_ONCE(name_len < 0)) + name_len = PATH_MAX-1; + + /* we count the trailing nul */ + name_len++; + return name_len; +} diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index dcd49ad60c83..4c764ff7edd2 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -159,13 +159,16 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, const struct nls_table *nls_cp) { char *bcc_ptr = *pbcc_area; + int len; /* copy user */ /* BB what about null user mounts - check that we do this BB */ /* copy user */ if (ses->user_name != NULL) { - strncpy(bcc_ptr, ses->user_name, CIFS_MAX_USERNAME_LEN); - bcc_ptr += strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN); + len = strscpy(bcc_ptr, ses->user_name, CIFS_MAX_USERNAME_LEN); + if (WARN_ON_ONCE(len < 0)) + len = CIFS_MAX_USERNAME_LEN - 1; + bcc_ptr += len; } /* else null user mount */ *bcc_ptr = 0; @@ -173,8 +176,10 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, /* copy domain */ if (ses->domainName != NULL) { - strncpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN); - bcc_ptr += strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN); + len = strscpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN); + if (WARN_ON_ONCE(len < 0)) + len = CIFS_MAX_DOMAINNAME_LEN - 1; + bcc_ptr += len; } /* else we will send a null domain name so the server will default to its own domain */ *bcc_ptr = 0; @@ -242,9 +247,10 @@ static void decode_ascii_ssetup(char **pbcc_area, __u16 bleft, kfree(ses->serverOS); - ses->serverOS = kzalloc(len + 1, GFP_KERNEL); + ses->serverOS = kmalloc(len + 1, GFP_KERNEL); if (ses->serverOS) { - strncpy(ses->serverOS, bcc_ptr, len); + memcpy(ses->serverOS, bcc_ptr, len); + ses->serverOS[len] = 0; if (strncmp(ses->serverOS, "OS/2", 4) == 0) cifs_dbg(FYI, "OS/2 server\n"); } @@ -258,9 +264,11 @@ static void decode_ascii_ssetup(char **pbcc_area, __u16 bleft, kfree(ses->serverNOS); - ses->serverNOS = kzalloc(len + 1, GFP_KERNEL); - if (ses->serverNOS) - strncpy(ses->serverNOS, bcc_ptr, len); + ses->serverNOS = kmalloc(len + 1, GFP_KERNEL); + if (ses->serverNOS) { + memcpy(ses->serverNOS, bcc_ptr, len); + ses->serverNOS[len] = 0; + } bcc_ptr += len + 1; bleft -= len + 1; -- cgit v1.2.1 From 36e337744c0d9ea23a64a8b62bddec6173e93975 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 27 Aug 2019 17:29:56 -0500 Subject: cifs: update internal module number To 2.22 Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 4b21a90015a9..99caf77df4a2 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -152,5 +152,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.21" +#define CIFS_VERSION "2.22" #endif /* _CIFSFS_H */ -- cgit v1.2.1 From 94acaeb50ced653bfe2c4d8037c70b107af14124 Mon Sep 17 00:00:00 2001 From: Marco Hartmann Date: Wed, 21 Aug 2019 11:00:46 +0000 Subject: Add genphy_c45_config_aneg() function to phy-c45.c Commit 34786005eca3 ("net: phy: prevent PHYs w/o Clause 22 regs from calling genphy_config_aneg") introduced a check that aborts phy_config_aneg() if the phy is a C45 phy. This causes phy_state_machine() to call phy_error() so that the phy ends up in PHY_HALTED state. Instead of returning -EOPNOTSUPP, call genphy_c45_config_aneg() (analogous to the C22 case) so that the state machine can run correctly. genphy_c45_config_aneg() closely resembles mv3310_config_aneg() in drivers/net/phy/marvell10g.c, excluding vendor specific configurations for 1000BaseT. Fixes: 22b56e827093 ("net: phy: replace genphy_10g_driver with genphy_c45_driver") Signed-off-by: Marco Hartmann Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy-c45.c | 26 ++++++++++++++++++++++++++ drivers/net/phy/phy.c | 2 +- include/linux/phy.h | 1 + 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 58bb25e4af10..7935593debb1 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -523,6 +523,32 @@ int genphy_c45_read_status(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(genphy_c45_read_status); +/** + * genphy_c45_config_aneg - restart auto-negotiation or forced setup + * @phydev: target phy_device struct + * + * Description: If auto-negotiation is enabled, we configure the + * advertising, and then restart auto-negotiation. If it is not + * enabled, then we force a configuration. + */ +int genphy_c45_config_aneg(struct phy_device *phydev) +{ + bool changed = false; + int ret; + + if (phydev->autoneg == AUTONEG_DISABLE) + return genphy_c45_pma_setup_forced(phydev); + + ret = genphy_c45_an_config_aneg(phydev); + if (ret < 0) + return ret; + if (ret > 0) + changed = true; + + return genphy_c45_check_and_restart_aneg(phydev, changed); +} +EXPORT_SYMBOL_GPL(genphy_c45_config_aneg); + /* The gen10g_* functions are the old Clause 45 stub */ int gen10g_config_aneg(struct phy_device *phydev) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index ef7aa738e0dc..6b0f89369b46 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -507,7 +507,7 @@ static int phy_config_aneg(struct phy_device *phydev) * allowed to call genphy_config_aneg() */ if (phydev->is_c45 && !(phydev->c45_ids.devices_in_package & BIT(0))) - return -EOPNOTSUPP; + return genphy_c45_config_aneg(phydev); return genphy_config_aneg(phydev); } diff --git a/include/linux/phy.h b/include/linux/phy.h index 462b90b73f93..2fb9c8ffaf10 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1107,6 +1107,7 @@ int genphy_c45_an_disable_aneg(struct phy_device *phydev); int genphy_c45_read_mdix(struct phy_device *phydev); int genphy_c45_pma_read_abilities(struct phy_device *phydev); int genphy_c45_read_status(struct phy_device *phydev); +int genphy_c45_config_aneg(struct phy_device *phydev); /* The gen10g_* functions are the old Clause 45 stub */ int gen10g_config_aneg(struct phy_device *phydev); -- cgit v1.2.1 From bcccb0a535bb99616e4b992568371efab1ab14e8 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 25 Aug 2019 21:32:12 +0300 Subject: net: dsa: tag_8021q: Future-proof the reserved fields in the custom VID After witnessing the discussion in https://lkml.org/lkml/2019/8/14/151 w.r.t. ioctl extensibility, it became clear that such an issue might prevent that the 3 RSV bits inside the DSA 802.1Q tag might also suffer the same fate and be useless for further extension. So clearly specify that the reserved bits should currently be transmitted as zero and ignored on receive. The DSA tagger already does this (and has always did), and is the only known user so far (no Wireshark dissection plugin, etc). So there should be no incompatibility to speak of. Fixes: 0471dd429cea ("net: dsa: tag_8021q: Create a stable binary format") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/tag_8021q.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 6ebbd799c4eb..67a1bc635a7b 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -28,6 +28,7 @@ * * RSV - VID[9]: * To be used for further expansion of SWITCH_ID or for other purposes. + * Must be transmitted as zero and ignored on receive. * * SWITCH_ID - VID[8:6]: * Index of switch within DSA tree. Must be between 0 and @@ -35,6 +36,7 @@ * * RSV - VID[5:4]: * To be used for further expansion of PORT or for other purposes. + * Must be transmitted as zero and ignored on receive. * * PORT - VID[3:0]: * Index of switch port. Must be between 0 and DSA_MAX_PORTS - 1. -- cgit v1.2.1 From 2c1644cf6d46a8267d79ed95cb9b563839346562 Mon Sep 17 00:00:00 2001 From: Feng Sun Date: Mon, 26 Aug 2019 14:46:04 +0800 Subject: net: fix skb use after free in netpoll After commit baeababb5b85d5c4e6c917efe2a1504179438d3b ("tun: return NET_XMIT_DROP for dropped packets"), when tun_net_xmit drop packets, it will free skb and return NET_XMIT_DROP, netpoll_send_skb_on_dev will run into following use after free cases: 1. retry netpoll_start_xmit with freed skb; 2. queue freed skb in npinfo->txq. queue_process will also run into use after free case. hit netpoll_send_skb_on_dev first case with following kernel log: [ 117.864773] kernel BUG at mm/slub.c:306! [ 117.864773] invalid opcode: 0000 [#1] SMP PTI [ 117.864774] CPU: 3 PID: 2627 Comm: loop_printmsg Kdump: loaded Tainted: P OE 5.3.0-050300rc5-generic #201908182231 [ 117.864775] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 117.864775] RIP: 0010:kmem_cache_free+0x28d/0x2b0 [ 117.864781] Call Trace: [ 117.864781] ? tun_net_xmit+0x21c/0x460 [ 117.864781] kfree_skbmem+0x4e/0x60 [ 117.864782] kfree_skb+0x3a/0xa0 [ 117.864782] tun_net_xmit+0x21c/0x460 [ 117.864782] netpoll_start_xmit+0x11d/0x1b0 [ 117.864788] netpoll_send_skb_on_dev+0x1b8/0x200 [ 117.864789] __br_forward+0x1b9/0x1e0 [bridge] [ 117.864789] ? skb_clone+0x53/0xd0 [ 117.864790] ? __skb_clone+0x2e/0x120 [ 117.864790] deliver_clone+0x37/0x50 [bridge] [ 117.864790] maybe_deliver+0x89/0xc0 [bridge] [ 117.864791] br_flood+0x6c/0x130 [bridge] [ 117.864791] br_dev_xmit+0x315/0x3c0 [bridge] [ 117.864792] netpoll_start_xmit+0x11d/0x1b0 [ 117.864792] netpoll_send_skb_on_dev+0x1b8/0x200 [ 117.864792] netpoll_send_udp+0x2c6/0x3e8 [ 117.864793] write_msg+0xd9/0xf0 [netconsole] [ 117.864793] console_unlock+0x386/0x4e0 [ 117.864793] vprintk_emit+0x17e/0x280 [ 117.864794] vprintk_default+0x29/0x50 [ 117.864794] vprintk_func+0x4c/0xbc [ 117.864794] printk+0x58/0x6f [ 117.864795] loop_fun+0x24/0x41 [printmsg_loop] [ 117.864795] kthread+0x104/0x140 [ 117.864795] ? 0xffffffffc05b1000 [ 117.864796] ? kthread_park+0x80/0x80 [ 117.864796] ret_from_fork+0x35/0x40 Signed-off-by: Feng Sun Signed-off-by: Xiaojun Zhao Signed-off-by: David S. Miller --- net/core/netpoll.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 2cf27da1baeb..849380a622ef 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -122,7 +122,7 @@ static void queue_process(struct work_struct *work) txq = netdev_get_tx_queue(dev, q_index); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (netif_xmit_frozen_or_stopped(txq) || - netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) { + !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) { skb_queue_head(&npinfo->txq, skb); HARD_TX_UNLOCK(dev, txq); local_irq_restore(flags); @@ -335,7 +335,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, HARD_TX_UNLOCK(dev, txq); - if (status == NETDEV_TX_OK) + if (dev_xmit_complete(status)) break; } @@ -352,7 +352,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, } - if (status != NETDEV_TX_OK) { + if (!dev_xmit_complete(status)) { skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } -- cgit v1.2.1 From 7d0a06586b2686ba80c4a2da5f91cb10ffbea736 Mon Sep 17 00:00:00 2001 From: Ka-Cheong Poon Date: Mon, 26 Aug 2019 02:39:12 -0700 Subject: net/rds: Fix info leak in rds6_inc_info_copy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rds6_inc_info_copy() function has a couple struct members which are leaking stack information. The ->tos field should hold actual information and the ->flags field needs to be zeroed out. Fixes: 3eb450367d08 ("rds: add type of service(tos) infrastructure") Fixes: b7ff8b1036f0 ("rds: Extend RDS API for IPv6 support") Reported-by: 黄ID蝴蝶 Signed-off-by: Dan Carpenter Signed-off-by: Ka-Cheong Poon Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/recv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/rds/recv.c b/net/rds/recv.c index 853de4876088..a42ba7fa06d5 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -811,6 +811,7 @@ void rds6_inc_info_copy(struct rds_incoming *inc, minfo6.seq = be64_to_cpu(inc->i_hdr.h_sequence); minfo6.len = be32_to_cpu(inc->i_hdr.h_len); + minfo6.tos = inc->i_conn->c_tos; if (flip) { minfo6.laddr = *daddr; @@ -824,6 +825,8 @@ void rds6_inc_info_copy(struct rds_incoming *inc, minfo6.fport = inc->i_hdr.h_dport; } + minfo6.flags = 0; + rds_info_copy(iter, &minfo6, sizeof(minfo6)); } #endif -- cgit v1.2.1 From fdfc5c8594c24c5df883583ebd286321a80e0a67 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 26 Aug 2019 09:19:15 -0700 Subject: tcp: remove empty skb from write queue in error cases Vladimir Rutsky reported stuck TCP sessions after memory pressure events. Edge Trigger epoll() user would never receive an EPOLLOUT notification allowing them to retry a sendmsg(). Jason tested the case of sk_stream_alloc_skb() returning NULL, but there are other paths that could lead both sendmsg() and sendpage() to return -1 (EAGAIN), with an empty skb queued on the write queue. This patch makes sure we remove this empty skb so that Jason code can detect that the queue is empty, and call sk->sk_write_space(sk) accordingly. Fixes: ce5ec440994b ("tcp: ensure epoll edge trigger wakeup when write queue is empty") Signed-off-by: Eric Dumazet Cc: Jason Baron Reported-by: Vladimir Rutsky Cc: Soheil Hassas Yeganeh Cc: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 77b485d60b9d..61082065b26a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -935,6 +935,22 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) return mss_now; } +/* In some cases, both sendpage() and sendmsg() could have added + * an skb to the write queue, but failed adding payload on it. + * We need to remove it to consume less memory, but more + * importantly be able to generate EPOLLOUT for Edge Trigger epoll() + * users. + */ +static void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) +{ + if (skb && !skb->len) { + tcp_unlink_write_queue(skb, sk); + if (tcp_write_queue_empty(sk)) + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); + sk_wmem_free_skb(sk, skb); + } +} + ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -1064,6 +1080,7 @@ out: return copied; do_error: + tcp_remove_empty_skb(sk, tcp_write_queue_tail(sk)); if (copied) goto out; out_err: @@ -1388,18 +1405,11 @@ out_nopush: sock_zerocopy_put(uarg); return copied + copied_syn; +do_error: + skb = tcp_write_queue_tail(sk); do_fault: - if (!skb->len) { - tcp_unlink_write_queue(skb, sk); - /* It is the one place in all of TCP, except connection - * reset, where we can be unlinking the send_head. - */ - if (tcp_write_queue_empty(sk)) - tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - sk_wmem_free_skb(sk, skb); - } + tcp_remove_empty_skb(sk, skb); -do_error: if (copied + copied_syn) goto out; out_err: -- cgit v1.2.1 From 82e40f558de566fdee214bec68096bbd5e64a6a4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 28 Aug 2019 11:10:16 +0100 Subject: KVM: arm/arm64: vgic-v2: Handle SGI bits in GICD_I{S,C}PENDR0 as WI A guest is not allowed to inject a SGI (or clear its pending state) by writing to GICD_ISPENDR0 (resp. GICD_ICPENDR0), as these bits are defined as WI (as per ARM IHI 0048B 4.3.7 and 4.3.8). Make sure we correctly emulate the architecture. Fixes: 96b298000db4 ("KVM: arm/arm64: vgic-new: Add PENDING registers handlers") Cc: stable@vger.kernel.org # 4.7+ Reported-by: Andre Przywara Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- virt/kvm/arm/vgic/vgic-mmio.c | 18 ++++++++++++++++++ virt/kvm/arm/vgic/vgic-v2.c | 5 ++++- virt/kvm/arm/vgic/vgic-v3.c | 5 ++++- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 3ba7278fb533..b249220025bc 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -195,6 +195,12 @@ static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq, vgic_irq_set_phys_active(irq, true); } +static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq) +{ + return (vgic_irq_is_sgi(irq->intid) && + vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2); +} + void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) @@ -207,6 +213,12 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + /* GICD_ISPENDR0 SGI bits are WI */ + if (is_vgic_v2_sgi(vcpu, irq)) { + vgic_put_irq(vcpu->kvm, irq); + continue; + } + raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->hw) vgic_hw_irq_spending(vcpu, irq, is_uaccess); @@ -254,6 +266,12 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + /* GICD_ICPENDR0 SGI bits are WI */ + if (is_vgic_v2_sgi(vcpu, irq)) { + vgic_put_irq(vcpu->kvm, irq); + continue; + } + raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->hw) diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 6dd5ad706c92..1059ce2ebfdf 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -184,7 +184,10 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) if (vgic_irq_is_sgi(irq->intid)) { u32 src = ffs(irq->source); - BUG_ON(!src); + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", + irq->intid)) + return; + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; irq->source &= ~(1 << (src - 1)); if (irq->source) { diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index c2c9ce009f63..f7a4219f4617 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -167,7 +167,10 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) model == KVM_DEV_TYPE_ARM_VGIC_V2) { u32 src = ffs(irq->source); - BUG_ON(!src); + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", + irq->intid)) + return; + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; irq->source &= ~(1 << (src - 1)); if (irq->source) { -- cgit v1.2.1 From e8c99200b4d117c340c392ebd5e62d85dfeed027 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 24 Jul 2019 17:43:06 +0800 Subject: libceph: don't call crypto_free_sync_skcipher() on a NULL tfm In set_secret(), key->tfm is assigned to NULL on line 55, and then ceph_crypto_key_destroy(key) is executed. ceph_crypto_key_destroy(key) crypto_free_sync_skcipher(key->tfm) crypto_free_skcipher(&tfm->base); This happens to work because crypto_sync_skcipher is a trivial wrapper around crypto_skcipher: &tfm->base is still 0 and crypto_free_skcipher() handles that. Let's not rely on the layout of crypto_sync_skcipher. This bug is found by a static analysis tool STCheck written by us. Fixes: 69d6302b65a8 ("libceph: Remove VLA usage of skcipher"). Signed-off-by: Jia-Ju Bai Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- net/ceph/crypto.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 5d6724cee38f..4f75df40fb12 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -136,8 +136,10 @@ void ceph_crypto_key_destroy(struct ceph_crypto_key *key) if (key) { kfree(key->key); key->key = NULL; - crypto_free_sync_skcipher(key->tfm); - key->tfm = NULL; + if (key->tfm) { + crypto_free_sync_skcipher(key->tfm); + key->tfm = NULL; + } } } -- cgit v1.2.1 From d435c9a7b85be1e820668d2f3718c2d9f24d5548 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 27 Aug 2019 16:45:10 +0200 Subject: rbd: restore zeroing past the overlap when reading from parent The parent image is read only up to the overlap point, the rest of the buffer should be zeroed. This snuck in because as it turns out the overlap test case has not been triggering this code path for a while now. Fixes: a9b67e69949d ("rbd: replace obj_req->tried_parent with obj_req->read_state") Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- drivers/block/rbd.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3327192bb71f..c8fb886aebd4 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3038,6 +3038,17 @@ again: } return true; case RBD_OBJ_READ_PARENT: + /* + * The parent image is read only up to the overlap -- zero-fill + * from the overlap to the end of the request. + */ + if (!*result) { + u32 obj_overlap = rbd_obj_img_extents_bytes(obj_req); + + if (obj_overlap < obj_req->ex.oe_len) + rbd_obj_zero_range(obj_req, obj_overlap, + obj_req->ex.oe_len - obj_overlap); + } return true; default: BUG(); -- cgit v1.2.1 From 531a64e4c35bb9844b0cf813a6c9a87e00be05ff Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Wed, 28 Aug 2019 15:03:55 +0200 Subject: RDMA/siw: Fix IPv6 addr_list locking Walking the address list of an inet6_dev requires appropriate locking. Since the called function siw_listen_address() may sleep, we have to use rtnl_lock() instead of read_lock_bh(). Also introduces sanity checks if we got a device from in_dev_get() or in6_dev_get(). Reported-by: Bart Van Assche Fixes: 6c52fdc244b5 ("rdma/siw: connection management") Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20190828130355.22830-1-bmt@zurich.ibm.com Signed-off-by: Doug Ledford --- drivers/infiniband/sw/siw/siw_cm.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 1db5ad3d9580..8c1931a57f4a 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1962,6 +1962,10 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) struct sockaddr_in s_laddr, *s_raddr; const struct in_ifaddr *ifa; + if (!in_dev) { + rv = -ENODEV; + goto out; + } memcpy(&s_laddr, &id->local_addr, sizeof(s_laddr)); s_raddr = (struct sockaddr_in *)&id->remote_addr; @@ -1991,22 +1995,27 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr), *s_raddr = &to_sockaddr_in6(id->remote_addr); + if (!in6_dev) { + rv = -ENODEV; + goto out; + } siw_dbg(id->device, "laddr %pI6:%d, raddr %pI6:%d\n", &s_laddr->sin6_addr, ntohs(s_laddr->sin6_port), &s_raddr->sin6_addr, ntohs(s_raddr->sin6_port)); - read_lock_bh(&in6_dev->lock); + rtnl_lock(); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { - struct sockaddr_in6 bind_addr; - + if (ifp->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED)) + continue; if (ipv6_addr_any(&s_laddr->sin6_addr) || ipv6_addr_equal(&s_laddr->sin6_addr, &ifp->addr)) { - bind_addr.sin6_family = AF_INET6; - bind_addr.sin6_port = s_laddr->sin6_port; - bind_addr.sin6_flowinfo = 0; - bind_addr.sin6_addr = ifp->addr; - bind_addr.sin6_scope_id = dev->ifindex; + struct sockaddr_in6 bind_addr = { + .sin6_family = AF_INET6, + .sin6_port = s_laddr->sin6_port, + .sin6_flowinfo = 0, + .sin6_addr = ifp->addr, + .sin6_scope_id = dev->ifindex }; rv = siw_listen_address(id, backlog, (struct sockaddr *)&bind_addr, @@ -2015,12 +2024,12 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) listeners++; } } - read_unlock_bh(&in6_dev->lock); - + rtnl_unlock(); in6_dev_put(in6_dev); } else { - return -EAFNOSUPPORT; + rv = -EAFNOSUPPORT; } +out: if (listeners) rv = 0; else if (!rv) -- cgit v1.2.1 From 42e0e95474fc6076b5cd68cab8fa0340a1797a72 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 28 Aug 2019 10:56:48 +0200 Subject: x86/build: Add -Wnoaddress-of-packed-member to REALMODE_CFLAGS, to silence GCC9 build warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One of the very few warnings I have in the current build comes from arch/x86/boot/edd.c, where I get the following with a gcc9 build: arch/x86/boot/edd.c: In function ‘query_edd’: arch/x86/boot/edd.c:148:11: warning: taking address of packed member of ‘struct boot_params’ may result in an unaligned pointer value [-Waddress-of-packed-member] 148 | mbrptr = boot_params.edd_mbr_sig_buffer; | ^~~~~~~~~~~ This warning triggers because we throw away all the CFLAGS and then make a new set for REALMODE_CFLAGS, so the -Wno-address-of-packed-member we added in the following commit is not present: 6f303d60534c ("gcc-9: silence 'address-of-packed-member' warning") The simplest solution for now is to adjust the warning for this version of CFLAGS as well, but it would definitely make sense to examine whether REALMODE_CFLAGS could be derived from CFLAGS, so that it picks up changes in the compiler flags environment automatically. Signed-off-by: Linus Torvalds Acked-by: Borislav Petkov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 56e748a7679f..94df0868804b 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -38,6 +38,7 @@ REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING \ REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -ffreestanding) REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -fno-stack-protector) +REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -Wno-address-of-packed-member) REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4)) export REALMODE_CFLAGS -- cgit v1.2.1 From 6978bce054247e4cfccdf689ce263e076499f905 Mon Sep 17 00:00:00 2001 From: Ayan Kumar Halder Date: Wed, 28 Aug 2019 15:00:19 +0000 Subject: drm/komeda: Reordered the komeda's de-init functions The de-init routine should be doing the following in order:- 1. Unregister the drm device 2. Shut down the crtcs - failing to do this might cause a connector leakage See the 'commit 109c4d18e574 ("drm/arm/malidp: Ensure that the crtcs are shutdown before removing any encoder/connector")' 3. Disable the interrupts 4. Unbind the components 5. Free up DRM mode_config info Changes from v1:- 1. Re-ordered the header files inclusion 2. Rebased on top of the latest drm-misc-fixes Signed-off-by:. Ayan Kumar Halder Reviewed-by: Mihail Atanassov Reviewed-by: James Qian Wang (Arm Technology China) Link: https://patchwork.freedesktop.org/patch/327606/ --- drivers/gpu/drm/arm/display/komeda/komeda_kms.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c index 1f0e3f4e8d74..69d9e26c60c8 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c @@ -14,8 +14,8 @@ #include #include #include -#include #include +#include #include "komeda_dev.h" #include "komeda_framebuffer.h" @@ -306,11 +306,11 @@ struct komeda_kms_dev *komeda_kms_attach(struct komeda_dev *mdev) komeda_kms_irq_handler, IRQF_SHARED, drm->driver->name, drm); if (err) - goto cleanup_mode_config; + goto free_component_binding; err = mdev->funcs->enable_irq(mdev); if (err) - goto cleanup_mode_config; + goto free_component_binding; drm->irq_enabled = true; @@ -318,15 +318,21 @@ struct komeda_kms_dev *komeda_kms_attach(struct komeda_dev *mdev) err = drm_dev_register(drm, 0); if (err) - goto cleanup_mode_config; + goto free_interrupts; return kms; -cleanup_mode_config: +free_interrupts: drm_kms_helper_poll_fini(drm); drm->irq_enabled = false; + mdev->funcs->disable_irq(mdev); +free_component_binding: + component_unbind_all(mdev->dev, drm); +cleanup_mode_config: drm_mode_config_cleanup(drm); komeda_kms_cleanup_private_objs(kms); + drm->dev_private = NULL; + drm_dev_put(drm); free_kms: kfree(kms); return ERR_PTR(err); @@ -337,13 +343,14 @@ void komeda_kms_detach(struct komeda_kms_dev *kms) struct drm_device *drm = &kms->base; struct komeda_dev *mdev = drm->dev_private; - drm->irq_enabled = false; - mdev->funcs->disable_irq(mdev); drm_dev_unregister(drm); drm_kms_helper_poll_fini(drm); + drm_atomic_helper_shutdown(drm); + drm->irq_enabled = false; + mdev->funcs->disable_irq(mdev); component_unbind_all(mdev->dev, drm); - komeda_kms_cleanup_private_objs(kms); drm_mode_config_cleanup(drm); + komeda_kms_cleanup_private_objs(kms); drm->dev_private = NULL; drm_dev_put(drm); } -- cgit v1.2.1 From a642a0b33343add9b771ce88a6c5ff6cf92f22d2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 27 Aug 2019 21:53:10 +0200 Subject: MAINTAINERS: add entry for LICENSES and SPDX stuff Thomas and I seem to have become the "unofficial" maintainers for these files and questions about SPDX things. So let's make it official. Reported-by: "Darrick J. Wong" Grumpily-acked-by: Thomas Gleixner Acked-by: Darrick J. Wong Link: https://lore.kernel.org/r/20190827195310.GA30618@kroah.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 43604d6ab96c..f13274f32101 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9233,6 +9233,18 @@ F: include/linux/nd.h F: include/linux/libnvdimm.h F: include/uapi/linux/ndctl.h +LICENSES and SPDX stuff +M: Thomas Gleixner +M: Greg Kroah-Hartman +L: linux-spdx@vger.kernel.org +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/spdx.git +F: COPYING +F: Documentation/process/license-rules.rst +F: LICENSES/ +F: scripts/spdxcheck-test.sh +F: scripts/spdxcheck.py + LIGHTNVM PLATFORM SUPPORT M: Matias Bjorling W: http://github/OpenChannelSSD -- cgit v1.2.1 From 961b6ffe0e2c403b09a8efe4a2e986b3c415391a Mon Sep 17 00:00:00 2001 From: Ding Xiang Date: Wed, 21 Aug 2019 10:49:52 +0300 Subject: stm class: Fix a double free of stm_source_device In the error path of stm_source_register_device(), the kfree is unnecessary, as the put_device() before it ends up calling stm_source_device_release() to free stm_source_device, leading to a double free at the outer kfree() call. Remove it. Signed-off-by: Ding Xiang Signed-off-by: Alexander Shishkin Fixes: 7bd1d4093c2fa ("stm class: Introduce an abstraction for System Trace Module devices") Link: https://lore.kernel.org/linux-arm-kernel/1563354988-23826-1-git-send-email-dingxiang@cmss.chinamobile.com/ Cc: stable@vger.kernel.org # v4.4+ Link: https://lore.kernel.org/r/20190821074955.3925-2-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/stm/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c index e55b902560de..181e7ff1ec4f 100644 --- a/drivers/hwtracing/stm/core.c +++ b/drivers/hwtracing/stm/core.c @@ -1276,7 +1276,6 @@ int stm_source_register_device(struct device *parent, err: put_device(&src->dev); - kfree(src); return err; } -- cgit v1.2.1 From 164eb56e3b64f3a816238d410c9efec7567a82ef Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 21 Aug 2019 10:49:54 +0300 Subject: intel_th: pci: Add support for another Lewisburg PCH Add support for the Trace Hub in another Lewisburg PCH. Signed-off-by: Alexander Shishkin Cc: stable@vger.kernel.org # v4.14+ Link: https://lore.kernel.org/r/20190821074955.3925-4-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index c0378c3de9a4..5c4e4fbec936 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -164,6 +164,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa1a6), .driver_data = (kernel_ulong_t)0, }, + { + /* Lewisburg PCH */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa226), + .driver_data = (kernel_ulong_t)0, + }, { /* Gemini Lake */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x318e), -- cgit v1.2.1 From 9c78255fdde45c6b9a1ee30f652f7b34c727f5c7 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 21 Aug 2019 10:49:55 +0300 Subject: intel_th: pci: Add Tiger Lake support This adds support for the Trace Hub in Tiger Lake PCH. Signed-off-by: Alexander Shishkin Cc: stable@vger.kernel.org # v4.14+ Link: https://lore.kernel.org/r/20190821074955.3925-5-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 5c4e4fbec936..91dfeba62485 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -204,6 +204,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x45c5), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Tiger Lake PCH */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa0a6), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { 0 }, }; -- cgit v1.2.1 From 587f17407741a5be07f8a2d1809ec946c8120962 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Mon, 19 Aug 2019 13:32:10 +0300 Subject: mei: me: add Tiger Lake point LP device ID Add Tiger Lake Point device ID for TGP LP. Signed-off-by: Tomas Winkler Cc: stable Link: https://lore.kernel.org/r/20190819103210.32748-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 2 ++ drivers/misc/mei/pci-me.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 6c0173772162..77f7dff7098d 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -81,6 +81,8 @@ #define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */ +#define MEI_DEV_ID_TGP_LP 0xA0E0 /* Tiger Lake Point LP */ + #define MEI_DEV_ID_MCC 0x4B70 /* Mule Creek Canyon (EHL) */ #define MEI_DEV_ID_MCC_4 0x4B75 /* Mule Creek Canyon 4 (EHL) */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 57cb68f5cc64..541538eff8b1 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -98,6 +98,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_MCC_4, MEI_ME_PCH8_CFG)}, -- cgit v1.2.1 From b9bc7b8b1e9e815b231c1ca0b566ee723f480987 Mon Sep 17 00:00:00 2001 From: Raul E Rangel Date: Tue, 27 Aug 2019 11:36:19 -0600 Subject: lkdtm/bugs: fix build error in lkdtm_EXHAUST_STACK lkdtm/bugs.c:94:2: error: format '%d' expects argument of type 'int', but argument 2 has type 'long unsigned int' [-Werror=format=] pr_info("Calling function with %d frame size to depth %d ...\n", ^ THREAD_SIZE is defined as a unsigned long, cast CONFIG_FRAME_WARN to unsigned long as well. Fixes: 24cccab42c419 ("lkdtm/bugs: Adjust recursion test to avoid elision") Cc: stable Signed-off-by: Raul E Rangel Acked-by: Kees Cook Link: https://lore.kernel.org/r/20190827173619.170065-1-rrangel@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/lkdtm/bugs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c index 1606658b9b7e..24245ccdba72 100644 --- a/drivers/misc/lkdtm/bugs.c +++ b/drivers/misc/lkdtm/bugs.c @@ -22,7 +22,7 @@ struct lkdtm_list { * recurse past the end of THREAD_SIZE by default. */ #if defined(CONFIG_FRAME_WARN) && (CONFIG_FRAME_WARN > 0) -#define REC_STACK_SIZE (CONFIG_FRAME_WARN / 2) +#define REC_STACK_SIZE (_AC(CONFIG_FRAME_WARN, UL) / 2) #else #define REC_STACK_SIZE (THREAD_SIZE / 8) #endif @@ -91,7 +91,7 @@ void lkdtm_LOOP(void) void lkdtm_EXHAUST_STACK(void) { - pr_info("Calling function with %d frame size to depth %d ...\n", + pr_info("Calling function with %lu frame size to depth %d ...\n", REC_STACK_SIZE, recur_count); recursive_loop(recur_count); pr_info("FAIL: survived without exhausting stack?!\n"); -- cgit v1.2.1 From ddaedbbece90add970faeac87f7d7d40341936ce Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Aug 2019 23:25:05 +0200 Subject: Documentation/process: Embargoed hardware security issues To address the requirements of embargoed hardware issues, like Meltdown, Spectre, L1TF etc. it is necessary to define and document a process for handling embargoed hardware security issues. Following the discussion at the maintainer summit 2018 in Edinburgh (https://lwn.net/Articles/769417/) the volunteered people have worked out a process and a Memorandum of Understanding. The latter addresses the fact that the Linux kernel community cannot sign NDAs for various reasons. The initial contact point for hardware security issues is different from the regular kernel security contact to provide a known and neutral interface for hardware vendors and researchers. The initial primary contact team is proposed to be staffed by Linux Foundation Fellows, who are not associated to a vendor or a distribution and are well connected in the industry as a whole. The process is designed with the experience of the past incidents in mind and tries to address the remaining gaps, so future (hopefully rare) incidents can be handled more efficiently. It won't remove the fact, that most of this has to be done behind closed doors, but it is set up to avoid big bureaucratic hurdles for individual developers. The process is solely for handling hardware security issues and cannot be used for regular kernel (software only) security bugs. This memo can help with hardware companies who, and I quote, "[my manager] doesn't want to bet his job on the list keeping things secret." This despite numerous leaks directly from that company over the years, and none ever so far from the kernel security team. Cognitive dissidence seems to be a requirement to be a good manager. To accelerate the adoption of this process, we introduce the concept of ambassadors in participating companies. The ambassadors are there to guide people to comply with the process, but are not automatically involved in the disclosure of a particular incident. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Josh Poimboeuf Acked-by: Laura Abbott Acked-by: Ben Hutchings Reviewed-by: Tyler Hicks Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Jiri Kosina Link: https://lore.kernel.org/r/20190815212505.GC12041@kroah.com Signed-off-by: Greg Kroah-Hartman --- .../process/embargoed-hardware-issues.rst | 279 +++++++++++++++++++++ Documentation/process/index.rst | 1 + 2 files changed, 280 insertions(+) create mode 100644 Documentation/process/embargoed-hardware-issues.rst diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst new file mode 100644 index 000000000000..d37cbc502936 --- /dev/null +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -0,0 +1,279 @@ +Embargoed hardware issues +========================= + +Scope +----- + +Hardware issues which result in security problems are a different category +of security bugs than pure software bugs which only affect the Linux +kernel. + +Hardware issues like Meltdown, Spectre, L1TF etc. must be treated +differently because they usually affect all Operating Systems ("OS") and +therefore need coordination across different OS vendors, distributions, +hardware vendors and other parties. For some of the issues, software +mitigations can depend on microcode or firmware updates, which need further +coordination. + +.. _Contact: + +Contact +------- + +The Linux kernel hardware security team is separate from the regular Linux +kernel security team. + +The team only handles the coordination of embargoed hardware security +issues. Reports of pure software security bugs in the Linux kernel are not +handled by this team and the reporter will be guided to contact the regular +Linux kernel security team (:ref:`Documentation/admin-guide/ +`) instead. + +The team can be contacted by email at . This +is a private list of security officers who will help you to coordinate an +issue according to our documented process. + +The list is encrypted and email to the list can be sent by either PGP or +S/MIME encrypted and must be signed with the reporter's PGP key or S/MIME +certificate. The list's PGP key and S/MIME certificate are available from +https://www.kernel.org/.... + +While hardware security issues are often handled by the affected hardware +vendor, we welcome contact from researchers or individuals who have +identified a potential hardware flaw. + +Hardware security officers +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The current team of hardware security officers: + + - Linus Torvalds (Linux Foundation Fellow) + - Greg Kroah-Hartman (Linux Foundation Fellow) + - Thomas Gleixner (Linux Foundation Fellow) + +Operation of mailing-lists +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The encrypted mailing-lists which are used in our process are hosted on +Linux Foundation's IT infrastructure. By providing this service Linux +Foundation's director of IT Infrastructure security technically has the +ability to access the embargoed information, but is obliged to +confidentiality by his employment contract. Linux Foundation's director of +IT Infrastructure security is also responsible for the kernel.org +infrastructure. + +The Linux Foundation's current director of IT Infrastructure security is +Konstantin Ryabitsev. + + +Non-disclosure agreements +------------------------- + +The Linux kernel hardware security team is not a formal body and therefore +unable to enter into any non-disclosure agreements. The kernel community +is aware of the sensitive nature of such issues and offers a Memorandum of +Understanding instead. + + +Memorandum of Understanding +--------------------------- + +The Linux kernel community has a deep understanding of the requirement to +keep hardware security issues under embargo for coordination between +different OS vendors, distributors, hardware vendors and other parties. + +The Linux kernel community has successfully handled hardware security +issues in the past and has the necessary mechanisms in place to allow +community compliant development under embargo restrictions. + +The Linux kernel community has a dedicated hardware security team for +initial contact, which oversees the process of handling such issues under +embargo rules. + +The hardware security team identifies the developers (domain experts) who +will form the initial response team for a particular issue. The initial +response team can bring in further developers (domain experts) to address +the issue in the best technical way. + +All involved developers pledge to adhere to the embargo rules and to keep +the received information confidential. Violation of the pledge will lead to +immediate exclusion from the current issue and removal from all related +mailing-lists. In addition, the hardware security team will also exclude +the offender from future issues. The impact of this consequence is a highly +effective deterrent in our community. In case a violation happens the +hardware security team will inform the involved parties immediately. If you +or anyone becomes aware of a potential violation, please report it +immediately to the Hardware security officers. + + +Process +^^^^^^^ + +Due to the globally distributed nature of Linux kernel development, +face-to-face meetings are almost impossible to address hardware security +issues. Phone conferences are hard to coordinate due to time zones and +other factors and should be only used when absolutely necessary. Encrypted +email has been proven to be the most effective and secure communication +method for these types of issues. + +Start of Disclosure +""""""""""""""""""" + +Disclosure starts by contacting the Linux kernel hardware security team by +email. This initial contact should contain a description of the problem and +a list of any known affected hardware. If your organization builds or +distributes the affected hardware, we encourage you to also consider what +other hardware could be affected. + +The hardware security team will provide an incident-specific encrypted +mailing-list which will be used for initial discussion with the reporter, +further disclosure and coordination. + +The hardware security team will provide the disclosing party a list of +developers (domain experts) who should be informed initially about the +issue after confirming with the developers that they will adhere to this +Memorandum of Understanding and the documented process. These developers +form the initial response team and will be responsible for handling the +issue after initial contact. The hardware security team is supporting the +response team, but is not necessarily involved in the mitigation +development process. + +While individual developers might be covered by a non-disclosure agreement +via their employer, they cannot enter individual non-disclosure agreements +in their role as Linux kernel developers. They will, however, agree to +adhere to this documented process and the Memorandum of Understanding. + + +Disclosure +"""""""""" + +The disclosing party provides detailed information to the initial response +team via the specific encrypted mailing-list. + +From our experience the technical documentation of these issues is usually +a sufficient starting point and further technical clarification is best +done via email. + +Mitigation development +"""""""""""""""""""""" + +The initial response team sets up an encrypted mailing-list or repurposes +an existing one if appropriate. The disclosing party should provide a list +of contacts for all other parties who have already been, or should be +informed about the issue. The response team contacts these parties so they +can name experts who should be subscribed to the mailing-list. + +Using a mailing-list is close to the normal Linux development process and +has been successfully used in developing mitigations for various hardware +security issues in the past. + +The mailing-list operates in the same way as normal Linux development. +Patches are posted, discussed and reviewed and if agreed on applied to a +non-public git repository which is only accessible to the participating +developers via a secure connection. The repository contains the main +development branch against the mainline kernel and backport branches for +stable kernel versions as necessary. + +The initial response team will identify further experts from the Linux +kernel developer community as needed and inform the disclosing party about +their participation. Bringing in experts can happen at any time of the +development process and often needs to be handled in a timely manner. + +Coordinated release +""""""""""""""""""" + +The involved parties will negotiate the date and time where the embargo +ends. At that point the prepared mitigations are integrated into the +relevant kernel trees and published. + +While we understand that hardware security issues need coordinated embargo +time, the embargo time should be constrained to the minimum time which is +required for all involved parties to develop, test and prepare the +mitigations. Extending embargo time artificially to meet conference talk +dates or other non-technical reasons is creating more work and burden for +the involved developers and response teams as the patches need to be kept +up to date in order to follow the ongoing upstream kernel development, +which might create conflicting changes. + +CVE assignment +"""""""""""""" + +Neither the hardware security team nor the initial response team assign +CVEs, nor are CVEs required for the development process. If CVEs are +provided by the disclosing party they can be used for documentation +purposes. + +Process ambassadors +------------------- + +For assistance with this process we have established ambassadors in various +organizations, who can answer questions about or provide guidance on the +reporting process and further handling. Ambassadors are not involved in the +disclosure of a particular issue, unless requested by a response team or by +an involved disclosed party. The current ambassadors list: + + ============= ======================================================== + ARM + AMD + IBM + Intel + Qualcomm + + Microsoft + VMware + XEN + + Canonical Tyler Hicks + Debian Ben Hutchings + Oracle Konrad Rzeszutek Wilk + Red Hat Josh Poimboeuf + SUSE Jiri Kosina + + Amazon + Google + ============== ======================================================== + +If you want your organization to be added to the ambassadors list, please +contact the hardware security team. The nominated ambassador has to +understand and support our process fully and is ideally well connected in +the Linux kernel community. + +Encrypted mailing-lists +----------------------- + +We use encrypted mailing-lists for communication. The operating principle +of these lists is that email sent to the list is encrypted either with the +list's PGP key or with the list's S/MIME certificate. The mailing-list +software decrypts the email and re-encrypts it individually for each +subscriber with the subscriber's PGP key or S/MIME certificate. Details +about the mailing-list software and the setup which is used to ensure the +security of the lists and protection of the data can be found here: +https://www.kernel.org/.... + +List keys +^^^^^^^^^ + +For initial contact see :ref:`Contact`. For incident specific mailing-lists +the key and S/MIME certificate are conveyed to the subscribers by email +sent from the specific list. + +Subscription to incident specific lists +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Subscription is handled by the response teams. Disclosed parties who want +to participate in the communication send a list of potential subscribers to +the response team so the response team can validate subscription requests. + +Each subscriber needs to send a subscription request to the response team +by email. The email must be signed with the subscriber's PGP key or S/MIME +certificate. If a PGP key is used, it must be available from a public key +server and is ideally connected to the Linux kernel's PGP web of trust. See +also: https://www.kernel.org/signature.html. + +The response team verifies that the subscriber request is valid and adds +the subscriber to the list. After subscription the subscriber will receive +email from the mailing-list which is signed either with the list's PGP key +or the list's S/MIME certificate. The subscriber's email client can extract +the PGP key or the S/MIME certificate from the signature so the subscriber +can send encrypted email to the list. + diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst index 878ebfda7eef..e2c9ffc682c5 100644 --- a/Documentation/process/index.rst +++ b/Documentation/process/index.rst @@ -45,6 +45,7 @@ Other guides to the community that are of interest to most developers are: submit-checklist kernel-docs deprecated + embargoed-hardware-issues These are some overall technical guides that have been put here for now for lack of a better place. -- cgit v1.2.1 From a684d8fd87182090ee96e34519ecdf009cef093a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 22 Aug 2019 14:52:12 +0100 Subject: typec: tcpm: fix a typo in the comparison of pdo_max_voltage There appears to be a typo in the comparison of pdo_max_voltage[i] with the previous value, currently it is checking against the array pdo_min_voltage rather than pdo_max_voltage. I believe this is a typo. Fix this. Addresses-Coverity: ("Copy-paste error") Fixes: 5007e1b5db73 ("typec: tcpm: Validate source and sink caps") Cc: stable Signed-off-by: Colin Ian King Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20190822135212.10195-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 15abe1d9958f..bcfdb55fd198 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -1446,7 +1446,7 @@ static enum pdo_err tcpm_caps_err(struct tcpm_port *port, const u32 *pdo, else if ((pdo_min_voltage(pdo[i]) == pdo_min_voltage(pdo[i - 1])) && (pdo_max_voltage(pdo[i]) == - pdo_min_voltage(pdo[i - 1]))) + pdo_max_voltage(pdo[i - 1]))) return PDO_ERR_DUPE_PDO; break; /* -- cgit v1.2.1 From 76da906ad727048a74bb8067031ee99fc070c7da Mon Sep 17 00:00:00 2001 From: "Schmid, Carsten" Date: Fri, 23 Aug 2019 14:11:28 +0000 Subject: usb: hcd: use managed device resources Using managed device resources in usb_hcd_pci_probe() allows devm usage for resource subranges, such as the mmio resource for the platform device created to control host/device mode mux, which is a xhci extended capability, and sits inside the xhci mmio region. If managed device resources are not used then "parent" resource is released before subrange at driver removal as .remove callback is called before the devres list of resources for this device is walked and released. This has been observed with the xhci extended capability driver causing a use-after-free which is now fixed. An additional nice benefit is that error handling on driver initialisation is simplified much. Signed-off-by: Carsten Schmid Tested-by: Carsten Schmid Reviewed-by: Mathias Nyman Fixes: fa31b3cb2ae1 ("xhci: Add Intel extended cap / otg phy mux handling") Cc: # v4.19+ Link: https://lore.kernel.org/r/1566569488679.31808@mentor.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd-pci.c | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index 03432467b05f..7537681355f6 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -216,17 +216,18 @@ int usb_hcd_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) /* EHCI, OHCI */ hcd->rsrc_start = pci_resource_start(dev, 0); hcd->rsrc_len = pci_resource_len(dev, 0); - if (!request_mem_region(hcd->rsrc_start, hcd->rsrc_len, - driver->description)) { + if (!devm_request_mem_region(&dev->dev, hcd->rsrc_start, + hcd->rsrc_len, driver->description)) { dev_dbg(&dev->dev, "controller already in use\n"); retval = -EBUSY; goto put_hcd; } - hcd->regs = ioremap_nocache(hcd->rsrc_start, hcd->rsrc_len); + hcd->regs = devm_ioremap_nocache(&dev->dev, hcd->rsrc_start, + hcd->rsrc_len); if (hcd->regs == NULL) { dev_dbg(&dev->dev, "error mapping memory\n"); retval = -EFAULT; - goto release_mem_region; + goto put_hcd; } } else { @@ -240,8 +241,8 @@ int usb_hcd_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) hcd->rsrc_start = pci_resource_start(dev, region); hcd->rsrc_len = pci_resource_len(dev, region); - if (request_region(hcd->rsrc_start, hcd->rsrc_len, - driver->description)) + if (devm_request_region(&dev->dev, hcd->rsrc_start, + hcd->rsrc_len, driver->description)) break; } if (region == PCI_ROM_RESOURCE) { @@ -275,20 +276,13 @@ int usb_hcd_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) } if (retval != 0) - goto unmap_registers; + goto put_hcd; device_wakeup_enable(hcd->self.controller); if (pci_dev_run_wake(dev)) pm_runtime_put_noidle(&dev->dev); return retval; -unmap_registers: - if (driver->flags & HCD_MEMORY) { - iounmap(hcd->regs); -release_mem_region: - release_mem_region(hcd->rsrc_start, hcd->rsrc_len); - } else - release_region(hcd->rsrc_start, hcd->rsrc_len); put_hcd: usb_put_hcd(hcd); disable_pci: @@ -347,14 +341,6 @@ void usb_hcd_pci_remove(struct pci_dev *dev) dev_set_drvdata(&dev->dev, NULL); up_read(&companions_rwsem); } - - if (hcd->driver->flags & HCD_MEMORY) { - iounmap(hcd->regs); - release_mem_region(hcd->rsrc_start, hcd->rsrc_len); - } else { - release_region(hcd->rsrc_start, hcd->rsrc_len); - } - usb_put_hcd(hcd); pci_disable_device(dev); } -- cgit v1.2.1 From a349b95d7ca0cea71be4a7dac29830703de7eb62 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 27 Aug 2019 12:51:50 +0900 Subject: usb: host: ohci: fix a race condition between shutdown and irq This patch fixes an issue that the following error is possible to happen when ohci hardware causes an interruption and the system is shutting down at the same time. [ 34.851754] usb 2-1: USB disconnect, device number 2 [ 35.166658] irq 156: nobody cared (try booting with the "irqpoll" option) [ 35.173445] CPU: 0 PID: 22 Comm: kworker/0:1 Not tainted 5.3.0-rc5 #85 [ 35.179964] Hardware name: Renesas Salvator-X 2nd version board based on r8a77965 (DT) [ 35.187886] Workqueue: usb_hub_wq hub_event [ 35.192063] Call trace: [ 35.194509] dump_backtrace+0x0/0x150 [ 35.198165] show_stack+0x14/0x20 [ 35.201475] dump_stack+0xa0/0xc4 [ 35.204785] __report_bad_irq+0x34/0xe8 [ 35.208614] note_interrupt+0x2cc/0x318 [ 35.212446] handle_irq_event_percpu+0x5c/0x88 [ 35.216883] handle_irq_event+0x48/0x78 [ 35.220712] handle_fasteoi_irq+0xb4/0x188 [ 35.224802] generic_handle_irq+0x24/0x38 [ 35.228804] __handle_domain_irq+0x5c/0xb0 [ 35.232893] gic_handle_irq+0x58/0xa8 [ 35.236548] el1_irq+0xb8/0x180 [ 35.239681] __do_softirq+0x94/0x23c [ 35.243253] irq_exit+0xd0/0xd8 [ 35.246387] __handle_domain_irq+0x60/0xb0 [ 35.250475] gic_handle_irq+0x58/0xa8 [ 35.254130] el1_irq+0xb8/0x180 [ 35.257268] kernfs_find_ns+0x5c/0x120 [ 35.261010] kernfs_find_and_get_ns+0x3c/0x60 [ 35.265361] sysfs_unmerge_group+0x20/0x68 [ 35.269454] dpm_sysfs_remove+0x2c/0x68 [ 35.273284] device_del+0x80/0x370 [ 35.276683] hid_destroy_device+0x28/0x60 [ 35.280686] usbhid_disconnect+0x4c/0x80 [ 35.284602] usb_unbind_interface+0x6c/0x268 [ 35.288867] device_release_driver_internal+0xe4/0x1b0 [ 35.293998] device_release_driver+0x14/0x20 [ 35.298261] bus_remove_device+0x110/0x128 [ 35.302350] device_del+0x148/0x370 [ 35.305832] usb_disable_device+0x8c/0x1d0 [ 35.309921] usb_disconnect+0xc8/0x2d0 [ 35.313663] hub_event+0x6e0/0x1128 [ 35.317146] process_one_work+0x1e0/0x320 [ 35.321148] worker_thread+0x40/0x450 [ 35.324805] kthread+0x124/0x128 [ 35.328027] ret_from_fork+0x10/0x18 [ 35.331594] handlers: [ 35.333862] [<0000000079300c1d>] usb_hcd_irq [ 35.338126] [<0000000079300c1d>] usb_hcd_irq [ 35.342389] Disabling IRQ #156 ohci_shutdown() disables all the interrupt and rh_state is set to OHCI_RH_HALTED. In other hand, ohci_irq() is possible to enable OHCI_INTR_SF and OHCI_INTR_MIE on ohci_irq(). Note that OHCI_INTR_SF is possible to be set by start_ed_unlink() which is called: ohci_irq() -> process_done_list() -> takeback_td() -> start_ed_unlink() So, ohci_irq() has the following condition, the issue happens by &ohci->regs->intrenable = OHCI_INTR_MIE | OHCI_INTR_SF and ohci->rh_state = OHCI_RH_HALTED: /* interrupt for some other device? */ if (ints == 0 || unlikely(ohci->rh_state == OHCI_RH_HALTED)) return IRQ_NOTMINE; To fix the issue, ohci_shutdown() holds the spin lock while disabling the interruption and changing the rh_state flag to prevent reenable the OHCI_INTR_MIE unexpectedly. Note that io_watchdog_func() also calls the ohci_shutdown() and it already held the spin lock, so that the patch makes a new function as _ohci_shutdown(). This patch is inspired by a Renesas R-Car Gen3 BSP patch from Tho Vu. Signed-off-by: Yoshihiro Shimoda Cc: stable Acked-by: Alan Stern Link: https://lore.kernel.org/r/1566877910-6020-1-git-send-email-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-hcd.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index b457fdaff297..1fe3deec35cf 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -419,8 +419,7 @@ static void ohci_usb_reset (struct ohci_hcd *ohci) * other cases where the next software may expect clean state from the * "firmware". this is bus-neutral, unlike shutdown() methods. */ -static void -ohci_shutdown (struct usb_hcd *hcd) +static void _ohci_shutdown(struct usb_hcd *hcd) { struct ohci_hcd *ohci; @@ -436,6 +435,16 @@ ohci_shutdown (struct usb_hcd *hcd) ohci->rh_state = OHCI_RH_HALTED; } +static void ohci_shutdown(struct usb_hcd *hcd) +{ + struct ohci_hcd *ohci = hcd_to_ohci(hcd); + unsigned long flags; + + spin_lock_irqsave(&ohci->lock, flags); + _ohci_shutdown(hcd); + spin_unlock_irqrestore(&ohci->lock, flags); +} + /*-------------------------------------------------------------------------* * HC functions *-------------------------------------------------------------------------*/ @@ -760,7 +769,7 @@ static void io_watchdog_func(struct timer_list *t) died: usb_hc_died(ohci_to_hcd(ohci)); ohci_dump(ohci); - ohci_shutdown(ohci_to_hcd(ohci)); + _ohci_shutdown(ohci_to_hcd(ohci)); goto done; } else { /* No write back because the done queue was empty */ -- cgit v1.2.1 From f6445b6b2f2bb1745080af4a0926049e8bca2617 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 28 Aug 2019 01:34:49 +0800 Subject: USB: storage: ums-realtek: Update module parameter description for auto_delink_en The option named "auto_delink_en" is a bit misleading, as setting it to false doesn't really disable auto-delink but let auto-delink be firmware controlled. Update the description to reflect the real usage of this parameter. Signed-off-by: Kai-Heng Feng Cc: stable Link: https://lore.kernel.org/r/20190827173450.13572-1-kai.heng.feng@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/realtek_cr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c index cc794e25a0b6..beaffac805af 100644 --- a/drivers/usb/storage/realtek_cr.c +++ b/drivers/usb/storage/realtek_cr.c @@ -38,7 +38,7 @@ MODULE_LICENSE("GPL"); static int auto_delink_en = 1; module_param(auto_delink_en, int, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(auto_delink_en, "enable auto delink"); +MODULE_PARM_DESC(auto_delink_en, "auto delink mode (0=firmware, 1=software [default])"); #ifdef CONFIG_REALTEK_AUTOPM static int ss_en = 1; -- cgit v1.2.1 From 1902a01e2bcc3abd7c9a18dc05e78c7ab4a53c54 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 28 Aug 2019 01:34:50 +0800 Subject: USB: storage: ums-realtek: Whitelist auto-delink support Auto-delink requires writing special registers to ums-realtek devices. Unconditionally enable auto-delink may break newer devices. So only enable auto-delink by default for the original three IDs, 0x0138, 0x0158 and 0x0159. Realtek is working on a patch to properly support auto-delink for other IDs. BugLink: https://bugs.launchpad.net/bugs/1838886 Signed-off-by: Kai-Heng Feng Acked-by: Alan Stern Cc: stable Link: https://lore.kernel.org/r/20190827173450.13572-2-kai.heng.feng@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/realtek_cr.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c index beaffac805af..1d9ce9cbc831 100644 --- a/drivers/usb/storage/realtek_cr.c +++ b/drivers/usb/storage/realtek_cr.c @@ -996,12 +996,15 @@ static int init_realtek_cr(struct us_data *us) goto INIT_FAIL; } - if (CHECK_FW_VER(chip, 0x5888) || CHECK_FW_VER(chip, 0x5889) || - CHECK_FW_VER(chip, 0x5901)) - SET_AUTO_DELINK(chip); - if (STATUS_LEN(chip) == 16) { - if (SUPPORT_AUTO_DELINK(chip)) + if (CHECK_PID(chip, 0x0138) || CHECK_PID(chip, 0x0158) || + CHECK_PID(chip, 0x0159)) { + if (CHECK_FW_VER(chip, 0x5888) || CHECK_FW_VER(chip, 0x5889) || + CHECK_FW_VER(chip, 0x5901)) SET_AUTO_DELINK(chip); + if (STATUS_LEN(chip) == 16) { + if (SUPPORT_AUTO_DELINK(chip)) + SET_AUTO_DELINK(chip); + } } #ifdef CONFIG_REALTEK_AUTOPM if (ss_en) -- cgit v1.2.1 From 993cc8753453fccfe060a535bbe21fcf1001b626 Mon Sep 17 00:00:00 2001 From: Nagarjuna Kristam Date: Wed, 28 Aug 2019 16:24:57 +0530 Subject: usb: host: xhci-tegra: Set DMA mask correctly The Falcon microcontroller that runs the XUSB firmware and which is responsible for exposing the XHCI interface can address only 40 bits of memory. Typically that's not a problem because Tegra devices don't have enough system memory to exceed those 40 bits. However, if the ARM SMMU is enable on Tegra186 and later, the addresses passed to the XUSB controller can be anywhere in the 48-bit IOV address space of the ARM SMMU. Since the DMA/IOMMU API starts allocating from the top of the IOVA space, the Falcon microcontroller is not able to load the firmware successfully. Fix this by setting the DMA mask to 40 bits, which will force the DMA API to map the buffer for the firmware to an IOVA that is addressable by the Falcon. Signed-off-by: Nagarjuna Kristam Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/1566989697-13049-1-git-send-email-nkristam@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-tegra.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index dafc65911fc0..2ff7c911fbd0 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -1194,6 +1194,16 @@ static int tegra_xusb_probe(struct platform_device *pdev) tegra_xusb_config(tegra, regs); + /* + * The XUSB Falcon microcontroller can only address 40 bits, so set + * the DMA mask accordingly. + */ + err = dma_set_mask_and_coherent(tegra->dev, DMA_BIT_MASK(40)); + if (err < 0) { + dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err); + goto put_rpm; + } + err = tegra_xusb_load_firmware(tegra); if (err < 0) { dev_err(&pdev->dev, "failed to load firmware: %d\n", err); -- cgit v1.2.1 From 636bd02a7ba9025ff851d0cfb92768c8fa865859 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 27 Aug 2019 14:51:12 +0200 Subject: usb: host: xhci: rcar: Fix typo in compatible string matching It's spelled "renesas", not "renensas". Due to this typo, RZ/G1M and RZ/G1N were not covered by the check. Fixes: 2dc240a3308b ("usb: host: xhci: rcar: retire use of xhci_plat_type_is()") Signed-off-by: Geert Uytterhoeven Cc: stable Reviewed-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20190827125112.12192-1-geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-rcar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-rcar.c b/drivers/usb/host/xhci-rcar.c index 8616c52849c6..2b0ccd150209 100644 --- a/drivers/usb/host/xhci-rcar.c +++ b/drivers/usb/host/xhci-rcar.c @@ -104,7 +104,7 @@ static int xhci_rcar_is_gen2(struct device *dev) return of_device_is_compatible(node, "renesas,xhci-r8a7790") || of_device_is_compatible(node, "renesas,xhci-r8a7791") || of_device_is_compatible(node, "renesas,xhci-r8a7793") || - of_device_is_compatible(node, "renensas,rcar-gen2-xhci"); + of_device_is_compatible(node, "renesas,rcar-gen2-xhci"); } static int xhci_rcar_is_gen3(struct device *dev) -- cgit v1.2.1 From 1426bd2c9f7e3126e2678e7469dca9fd9fc6dd3e Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 27 Aug 2019 12:34:36 +0200 Subject: USB: cdc-wdm: fix race between write and disconnect due to flag abuse In case of a disconnect an ongoing flush() has to be made fail. Nevertheless we cannot be sure that any pending URB has already finished, so although they will never succeed, they still must not be touched. The clean solution for this is to check for WDM_IN_USE and WDM_DISCONNECTED in flush(). There is no point in ever clearing WDM_IN_USE, as no further writes make sense. The issue is as old as the driver. Fixes: afba937e540c9 ("USB: CDC WDM driver") Reported-by: syzbot+d232cca6ec42c2edb3fc@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20190827103436.21143-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index a7824a51f86d..70afb2ca1eab 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -587,10 +587,20 @@ static int wdm_flush(struct file *file, fl_owner_t id) { struct wdm_device *desc = file->private_data; - wait_event(desc->wait, !test_bit(WDM_IN_USE, &desc->flags)); + wait_event(desc->wait, + /* + * needs both flags. We cannot do with one + * because resetting it would cause a race + * with write() yet we need to signal + * a disconnect + */ + !test_bit(WDM_IN_USE, &desc->flags) || + test_bit(WDM_DISCONNECTING, &desc->flags)); /* cannot dereference desc->intf if WDM_DISCONNECTING */ - if (desc->werr < 0 && !test_bit(WDM_DISCONNECTING, &desc->flags)) + if (test_bit(WDM_DISCONNECTING, &desc->flags)) + return -ENODEV; + if (desc->werr < 0) dev_err(&desc->intf->dev, "Error in flush path: %d\n", desc->werr); @@ -974,8 +984,6 @@ static void wdm_disconnect(struct usb_interface *intf) spin_lock_irqsave(&desc->iuspin, flags); set_bit(WDM_DISCONNECTING, &desc->flags); set_bit(WDM_READ, &desc->flags); - /* to terminate pending flushes */ - clear_bit(WDM_IN_USE, &desc->flags); spin_unlock_irqrestore(&desc->iuspin, flags); wake_up_all(&desc->wait); mutex_lock(&desc->rlock); -- cgit v1.2.1 From ba03a9bbd17b149c373c0ea44017f35fc2cd0f28 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 20 Aug 2019 13:26:38 -0700 Subject: VMCI: Release resource if the work is already queued Francois reported that VMware balloon gets stuck after a balloon reset, when the VMCI doorbell is removed. A similar error can occur when the balloon driver is removed with the following splat: [ 1088.622000] INFO: task modprobe:3565 blocked for more than 120 seconds. [ 1088.622035] Tainted: G W 5.2.0 #4 [ 1088.622087] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 1088.622205] modprobe D 0 3565 1450 0x00000000 [ 1088.622210] Call Trace: [ 1088.622246] __schedule+0x2a8/0x690 [ 1088.622248] schedule+0x2d/0x90 [ 1088.622250] schedule_timeout+0x1d3/0x2f0 [ 1088.622252] wait_for_completion+0xba/0x140 [ 1088.622320] ? wake_up_q+0x80/0x80 [ 1088.622370] vmci_resource_remove+0xb9/0xc0 [vmw_vmci] [ 1088.622373] vmci_doorbell_destroy+0x9e/0xd0 [vmw_vmci] [ 1088.622379] vmballoon_vmci_cleanup+0x6e/0xf0 [vmw_balloon] [ 1088.622381] vmballoon_exit+0x18/0xcc8 [vmw_balloon] [ 1088.622394] __x64_sys_delete_module+0x146/0x280 [ 1088.622408] do_syscall_64+0x5a/0x130 [ 1088.622410] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 1088.622415] RIP: 0033:0x7f54f62791b7 [ 1088.622421] Code: Bad RIP value. [ 1088.622421] RSP: 002b:00007fff2a949008 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [ 1088.622426] RAX: ffffffffffffffda RBX: 000055dff8b55d00 RCX: 00007f54f62791b7 [ 1088.622426] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 000055dff8b55d68 [ 1088.622427] RBP: 000055dff8b55d00 R08: 00007fff2a947fb1 R09: 0000000000000000 [ 1088.622427] R10: 00007f54f62f5cc0 R11: 0000000000000206 R12: 000055dff8b55d68 [ 1088.622428] R13: 0000000000000001 R14: 000055dff8b55d68 R15: 00007fff2a94a3f0 The cause for the bug is that when the "delayed" doorbell is invoked, it takes a reference on the doorbell entry and schedules work that is supposed to run the appropriate code and drop the doorbell entry reference. The code ignores the fact that if the work is already queued, it will not be scheduled to run one more time. As a result one of the references would not be dropped. When the code waits for the reference to get to zero, during balloon reset or module removal, it gets stuck. Fix it. Drop the reference if schedule_work() indicates that the work is already queued. Note that this bug got more apparent (or apparent at all) due to commit ce664331b248 ("vmw_balloon: VMCI_DOORBELL_SET does not check status"). Fixes: 83e2ec765be03 ("VMCI: doorbell implementation.") Reported-by: Francois Rigault Cc: Jorgen Hansen Cc: Adit Ranadive Cc: Alexios Zavras Cc: Vishnu DASA Cc: stable@vger.kernel.org Signed-off-by: Nadav Amit Reviewed-by: Vishnu Dasa Link: https://lore.kernel.org/r/20190820202638.49003-1-namit@vmware.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_vmci/vmci_doorbell.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.c b/drivers/misc/vmw_vmci/vmci_doorbell.c index bad89b6e0802..345addd9306d 100644 --- a/drivers/misc/vmw_vmci/vmci_doorbell.c +++ b/drivers/misc/vmw_vmci/vmci_doorbell.c @@ -310,7 +310,8 @@ int vmci_dbell_host_context_notify(u32 src_cid, struct vmci_handle handle) entry = container_of(resource, struct dbell_entry, resource); if (entry->run_delayed) { - schedule_work(&entry->work); + if (!schedule_work(&entry->work)) + vmci_resource_put(resource); } else { entry->notify_cb(entry->client_data); vmci_resource_put(resource); @@ -361,7 +362,8 @@ static void dbell_fire_entries(u32 notify_idx) atomic_read(&dbell->active) == 1) { if (dbell->run_delayed) { vmci_resource_get(&dbell->resource); - schedule_work(&dbell->work); + if (!schedule_work(&dbell->work)) + vmci_resource_put(&dbell->resource); } else { dbell->notify_cb(dbell->client_data); } -- cgit v1.2.1 From 468e0ffac803f5557a62a33b72ef8844d1bfdc08 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 20 Aug 2019 09:01:21 -0700 Subject: vmw_balloon: Fix offline page marking with compaction The compaction code already marks pages as offline when it enqueues pages in the ballooned page list, and removes the mapping when the pages are removed from the list. VMware balloon also updates the flags, instead of letting the balloon-compaction logic handle it, which causes the assertion VM_BUG_ON_PAGE(!PageOffline(page)) to fire, when __ClearPageOffline is called the second time. This causes the following crash. [ 487.104520] kernel BUG at include/linux/page-flags.h:749! [ 487.106364] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC PTI [ 487.107681] CPU: 7 PID: 1106 Comm: kworker/7:3 Not tainted 5.3.0-rc5balloon #227 [ 487.109196] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 12/12/2018 [ 487.111452] Workqueue: events_freezable vmballoon_work [vmw_balloon] [ 487.112779] RIP: 0010:vmballoon_release_page_list+0xaa/0x100 [vmw_balloon] [ 487.114200] Code: fe 48 c1 e7 06 4c 01 c7 8b 47 30 41 89 c1 41 81 e1 00 01 00 f0 41 81 f9 00 00 00 f0 74 d3 48 c7 c6 08 a1 a1 c0 e8 06 0d e7 ea <0f> 0b 44 89 f6 4c 89 c7 e8 49 9c e9 ea 49 8d 75 08 49 8b 45 08 4d [ 487.118033] RSP: 0018:ffffb82f012bbc98 EFLAGS: 00010246 [ 487.119135] RAX: 0000000000000037 RBX: 0000000000000001 RCX: 0000000000000006 [ 487.120601] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9a85b6bd7620 [ 487.122071] RBP: ffffb82f012bbcc0 R08: 0000000000000001 R09: 0000000000000000 [ 487.123536] R10: 0000000000000000 R11: 0000000000000000 R12: ffffb82f012bbd00 [ 487.125002] R13: ffffe97f4598d9c0 R14: 0000000000000000 R15: ffffb82f012bbd34 [ 487.126463] FS: 0000000000000000(0000) GS:ffff9a85b6bc0000(0000) knlGS:0000000000000000 [ 487.128110] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 487.129316] CR2: 00007ffe6e413ea0 CR3: 0000000230b18001 CR4: 00000000003606e0 [ 487.130812] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 487.132283] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 487.133749] Call Trace: [ 487.134333] vmballoon_deflate+0x22c/0x390 [vmw_balloon] [ 487.135468] vmballoon_work+0x6e7/0x913 [vmw_balloon] [ 487.136711] ? process_one_work+0x21a/0x5e0 [ 487.138581] process_one_work+0x298/0x5e0 [ 487.139926] ? vmballoon_migratepage+0x310/0x310 [vmw_balloon] [ 487.141610] ? process_one_work+0x298/0x5e0 [ 487.143053] worker_thread+0x41/0x400 [ 487.144389] kthread+0x12b/0x150 [ 487.145582] ? process_one_work+0x5e0/0x5e0 [ 487.146937] ? kthread_create_on_node+0x60/0x60 [ 487.148637] ret_from_fork+0x3a/0x50 Fix it by updating the PageOffline indication only when a 2MB page is enqueued and dequeued. The 4KB pages will be handled correctly by the balloon compaction logic. Fixes: 83a8afa72e9c ("vmw_balloon: Compaction support") Cc: David Hildenbrand Reported-by: Thomas Hellstrom Signed-off-by: Nadav Amit Link: https://lore.kernel.org/r/20190820160121.452-1-namit@vmware.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_balloon.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 8840299420e0..5e6be1527571 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -691,7 +691,6 @@ static int vmballoon_alloc_page_list(struct vmballoon *b, } if (page) { - vmballoon_mark_page_offline(page, ctl->page_size); /* Success. Add the page to the list and continue. */ list_add(&page->lru, &ctl->pages); continue; @@ -930,7 +929,6 @@ static void vmballoon_release_page_list(struct list_head *page_list, list_for_each_entry_safe(page, tmp, page_list, lru) { list_del(&page->lru); - vmballoon_mark_page_online(page, page_size); __free_pages(page, vmballoon_page_order(page_size)); } @@ -1005,6 +1003,7 @@ static void vmballoon_enqueue_page_list(struct vmballoon *b, enum vmballoon_page_size_type page_size) { unsigned long flags; + struct page *page; if (page_size == VMW_BALLOON_4K_PAGE) { balloon_page_list_enqueue(&b->b_dev_info, pages); @@ -1014,6 +1013,11 @@ static void vmballoon_enqueue_page_list(struct vmballoon *b, * for the balloon compaction mechanism. */ spin_lock_irqsave(&b->b_dev_info.pages_lock, flags); + + list_for_each_entry(page, pages, lru) { + vmballoon_mark_page_offline(page, VMW_BALLOON_2M_PAGE); + } + list_splice_init(pages, &b->huge_pages); __count_vm_events(BALLOON_INFLATE, *n_pages * vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE)); @@ -1056,6 +1060,8 @@ static void vmballoon_dequeue_page_list(struct vmballoon *b, /* 2MB pages */ spin_lock_irqsave(&b->b_dev_info.pages_lock, flags); list_for_each_entry_safe(page, tmp, &b->huge_pages, lru) { + vmballoon_mark_page_online(page, VMW_BALLOON_2M_PAGE); + list_move(&page->lru, pages); if (++i == n_req_pages) break; -- cgit v1.2.1 From 8919dfcb31161fae7d607bbef5247e5e82fd6457 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 27 Aug 2019 12:12:49 +0800 Subject: fsi: scom: Don't abort operations for minor errors The scom driver currently fails out of operations if certain system errors are flagged in the status register; system checkstop, special attention, or recoverable error. These errors won't impact the ability of the scom engine to perform operations, so the driver should continue under these conditions. Also, don't do a PIB reset for these conditions, since it won't help. Fixes: 6b293258cded ("fsi: scom: Major overhaul") Signed-off-by: Eddie James Cc: stable Acked-by: Jeremy Kerr Acked-by: Benjamin Herrenschmidt Signed-off-by: Joel Stanley Link: https://lore.kernel.org/r/20190827041249.13381-1-jk@ozlabs.org Signed-off-by: Greg Kroah-Hartman --- drivers/fsi/fsi-scom.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/fsi/fsi-scom.c b/drivers/fsi/fsi-scom.c index 343153d47e5b..004dc03ccf09 100644 --- a/drivers/fsi/fsi-scom.c +++ b/drivers/fsi/fsi-scom.c @@ -38,8 +38,7 @@ #define SCOM_STATUS_PIB_RESP_MASK 0x00007000 #define SCOM_STATUS_PIB_RESP_SHIFT 12 -#define SCOM_STATUS_ANY_ERR (SCOM_STATUS_ERR_SUMMARY | \ - SCOM_STATUS_PROTECTION | \ +#define SCOM_STATUS_ANY_ERR (SCOM_STATUS_PROTECTION | \ SCOM_STATUS_PARITY | \ SCOM_STATUS_PIB_ABORT | \ SCOM_STATUS_PIB_RESP_MASK) @@ -251,11 +250,6 @@ static int handle_fsi2pib_status(struct scom_device *scom, uint32_t status) /* Return -EBUSY on PIB abort to force a retry */ if (status & SCOM_STATUS_PIB_ABORT) return -EBUSY; - if (status & SCOM_STATUS_ERR_SUMMARY) { - fsi_device_write(scom->fsi_dev, SCOM_FSI2PIB_RESET_REG, &dummy, - sizeof(uint32_t)); - return -EIO; - } return 0; } -- cgit v1.2.1 From abecec415dc2b5a5d11fe1b17963564b13f2ff20 Mon Sep 17 00:00:00 2001 From: Yash Shah Date: Tue, 27 Aug 2019 10:36:03 +0530 Subject: macb: bindings doc: update sifive fu540-c000 binding As per the discussion with Nicolas Ferre[0], rename the compatible property to a more appropriate and specific string. [0] https://lore.kernel.org/netdev/CAJ2_jOFEVZQat0Yprg4hem4jRrqkB72FKSeQj4p8P5KA-+rgww@mail.gmail.com/ Signed-off-by: Yash Shah Acked-by: Nicolas Ferre Reviewed-by: Paul Walmsley Reviewed-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/macb.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index 63c73fafe26d..0b61a90f1592 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -15,10 +15,10 @@ Required properties: Use "atmel,sama5d4-gem" for the GEM IP (10/100) available on Atmel sama5d4 SoCs. Use "cdns,zynq-gem" Xilinx Zynq-7xxx SoC. Use "cdns,zynqmp-gem" for Zynq Ultrascale+ MPSoC. - Use "sifive,fu540-macb" for SiFive FU540-C000 SoC. + Use "sifive,fu540-c000-gem" for SiFive FU540-C000 SoC. Or the generic form: "cdns,emac". - reg: Address and length of the register set for the device - For "sifive,fu540-macb", second range is required to specify the + For "sifive,fu540-c000-gem", second range is required to specify the address and length of the registers for GEMGXL Management block. - interrupts: Should contain macb interrupt - phy-mode: See ethernet.txt file in the same directory. -- cgit v1.2.1 From 6342ea886771d65d718d07049c75b25b27e60cfd Mon Sep 17 00:00:00 2001 From: Yash Shah Date: Tue, 27 Aug 2019 10:36:04 +0530 Subject: macb: Update compatibility string for SiFive FU540-C000 Update the compatibility string for SiFive FU540-C000 as per the new string updated in the binding doc. Reference: https://lore.kernel.org/netdev/CAJ2_jOFEVZQat0Yprg4hem4jRrqkB72FKSeQj4p8P5KA-+rgww@mail.gmail.com/ Signed-off-by: Yash Shah Acked-by: Nicolas Ferre Reviewed-by: Paul Walmsley Tested-by: Paul Walmsley Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 5ca17e62dc3e..35b59b5edf0f 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4154,7 +4154,7 @@ static const struct of_device_id macb_dt_ids[] = { { .compatible = "cdns,emac", .data = &emac_config }, { .compatible = "cdns,zynqmp-gem", .data = &zynqmp_config}, { .compatible = "cdns,zynq-gem", .data = &zynq_config }, - { .compatible = "sifive,fu540-macb", .data = &fu540_c000_config }, + { .compatible = "sifive,fu540-c000-gem", .data = &fu540_c000_config }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, macb_dt_ids); -- cgit v1.2.1 From 04d37cf46a773910f75fefaa9f9488f42bfe1fe2 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 27 Aug 2019 12:29:09 +0200 Subject: net/sched: pfifo_fast: fix wrong dereference when qdisc is reset Now that 'TCQ_F_CPUSTATS' bit can be cleared, depending on the value of 'TCQ_F_NOLOCK' bit in the parent qdisc, we need to be sure that per-cpu counters are present when 'reset()' is called for pfifo_fast qdiscs. Otherwise, the following script: # tc q a dev lo handle 1: root htb default 100 # tc c a dev lo parent 1: classid 1:100 htb \ > rate 95Mbit ceil 100Mbit burst 64k [...] # tc f a dev lo parent 1: protocol arp basic classid 1:100 [...] # tc q a dev lo parent 1:100 handle 100: pfifo_fast [...] # tc q d dev lo root can generate the following splat: Unable to handle kernel paging request at virtual address dfff2c01bd148000 Mem abort info: ESR = 0x96000004 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 [dfff2c01bd148000] address between user and kernel address ranges Internal error: Oops: 96000004 [#1] SMP [...] pstate: 80000005 (Nzcv daif -PAN -UAO) pc : pfifo_fast_reset+0x280/0x4d8 lr : pfifo_fast_reset+0x21c/0x4d8 sp : ffff800d09676fa0 x29: ffff800d09676fa0 x28: ffff200012ee22e4 x27: dfff200000000000 x26: 0000000000000000 x25: ffff800ca0799958 x24: ffff1001940f332b x23: 0000000000000007 x22: ffff200012ee1ab8 x21: 0000600de8a40000 x20: 0000000000000000 x19: ffff800ca0799900 x18: 0000000000000000 x17: 0000000000000002 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: ffff1001b922e6e2 x11: 1ffff001b922e6e1 x10: 0000000000000000 x9 : 1ffff001b922e6e1 x8 : dfff200000000000 x7 : 0000000000000000 x6 : 0000000000000000 x5 : 1fffe400025dc45c x4 : 1fffe400025dc357 x3 : 00000c01bd148000 x2 : 0000600de8a40000 x1 : 0000000000000007 x0 : 0000600de8a40004 Call trace: pfifo_fast_reset+0x280/0x4d8 qdisc_reset+0x6c/0x370 htb_reset+0x150/0x3b8 [sch_htb] qdisc_reset+0x6c/0x370 dev_deactivate_queue.constprop.5+0xe0/0x1a8 dev_deactivate_many+0xd8/0x908 dev_deactivate+0xe4/0x190 qdisc_graft+0x88c/0xbd0 tc_get_qdisc+0x418/0x8a8 rtnetlink_rcv_msg+0x3a8/0xa78 netlink_rcv_skb+0x18c/0x328 rtnetlink_rcv+0x28/0x38 netlink_unicast+0x3c4/0x538 netlink_sendmsg+0x538/0x9a0 sock_sendmsg+0xac/0xf8 ___sys_sendmsg+0x53c/0x658 __sys_sendmsg+0xc8/0x140 __arm64_sys_sendmsg+0x74/0xa8 el0_svc_handler+0x164/0x468 el0_svc+0x10/0x14 Code: 910012a0 92400801 d343fc03 11000c21 (38fb6863) Fix this by testing the value of 'TCQ_F_CPUSTATS' bit in 'qdisc->flags', before dereferencing 'qdisc->cpu_qstats'. Changes since v1: - coding style improvements, thanks to Stefano Brivio Fixes: 8a53e616de29 ("net: sched: when clearing NOLOCK, clear TCQ_F_CPUSTATS, too") CC: Paolo Abeni Reported-by: Li Shuang Signed-off-by: Davide Caratti Acked-by: Paolo Abeni Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 11c03cf4aa74..099797e5409d 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -688,11 +688,14 @@ static void pfifo_fast_reset(struct Qdisc *qdisc) kfree_skb(skb); } - for_each_possible_cpu(i) { - struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i); + if (qdisc_is_percpu_stats(qdisc)) { + for_each_possible_cpu(i) { + struct gnet_stats_queue *q; - q->backlog = 0; - q->qlen = 0; + q = per_cpu_ptr(qdisc->cpu_qstats, i); + q->backlog = 0; + q->qlen = 0; + } } } -- cgit v1.2.1 From a84d016479896b5526a2cc54784e6ffc41c9d6f6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Aug 2019 03:33:12 -0700 Subject: mld: fix memory leak in mld_del_delrec() Similar to the fix done for IPv4 in commit e5b1c6c6277d ("igmp: fix memory leak in igmpv3_del_delrec()"), we need to make sure mca_tomb and mca_sources are not blindly overwritten. Using swap() then a call to ip6_mc_clear_src() will take care of the missing free. BUG: memory leak unreferenced object 0xffff888117d9db00 (size 64): comm "syz-executor247", pid 6918, jiffies 4294943989 (age 25.350s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 fe 88 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<000000005b463030>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [<000000005b463030>] slab_post_alloc_hook mm/slab.h:522 [inline] [<000000005b463030>] slab_alloc mm/slab.c:3319 [inline] [<000000005b463030>] kmem_cache_alloc_trace+0x145/0x2c0 mm/slab.c:3548 [<00000000939cbf94>] kmalloc include/linux/slab.h:552 [inline] [<00000000939cbf94>] kzalloc include/linux/slab.h:748 [inline] [<00000000939cbf94>] ip6_mc_add1_src net/ipv6/mcast.c:2236 [inline] [<00000000939cbf94>] ip6_mc_add_src+0x31f/0x420 net/ipv6/mcast.c:2356 [<00000000d8972221>] ip6_mc_source+0x4a8/0x600 net/ipv6/mcast.c:449 [<000000002b203d0d>] do_ipv6_setsockopt.isra.0+0x1b92/0x1dd0 net/ipv6/ipv6_sockglue.c:748 [<000000001f1e2d54>] ipv6_setsockopt+0x89/0xd0 net/ipv6/ipv6_sockglue.c:944 [<00000000c8f7bdf9>] udpv6_setsockopt+0x4e/0x90 net/ipv6/udp.c:1558 [<000000005a9a0c5e>] sock_common_setsockopt+0x38/0x50 net/core/sock.c:3139 [<00000000910b37b2>] __sys_setsockopt+0x10f/0x220 net/socket.c:2084 [<00000000e9108023>] __do_sys_setsockopt net/socket.c:2100 [inline] [<00000000e9108023>] __se_sys_setsockopt net/socket.c:2097 [inline] [<00000000e9108023>] __x64_sys_setsockopt+0x26/0x30 net/socket.c:2097 [<00000000f4818160>] do_syscall_64+0x76/0x1a0 arch/x86/entry/common.c:296 [<000000008d367e8f>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 1666d49e1d41 ("mld: do not remove mld souce list info when set link down") Fixes: 9c8bb163ae78 ("igmp, mld: Fix memory leak in igmpv3/mld_del_delrec()") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 7f3f13c37916..eaa4c2cc2fbb 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -787,14 +787,15 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) if (pmc) { im->idev = pmc->idev; if (im->mca_sfmode == MCAST_INCLUDE) { - im->mca_tomb = pmc->mca_tomb; - im->mca_sources = pmc->mca_sources; + swap(im->mca_tomb, pmc->mca_tomb); + swap(im->mca_sources, pmc->mca_sources); for (psf = im->mca_sources; psf; psf = psf->sf_next) psf->sf_crcount = idev->mc_qrv; } else { im->mca_crcount = idev->mc_qrv; } in6_dev_put(pmc->idev); + ip6_mc_clear_src(pmc); kfree(pmc); } spin_unlock_bh(&im->mca_lock); -- cgit v1.2.1 From ad06a566e118e57b852cab5933dbbbaebb141de3 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 27 Aug 2019 07:58:09 -0700 Subject: openvswitch: Properly set L4 keys on "later" IP fragments When IP fragments are reassembled before being sent to conntrack, the key from the last fragment is used. Unless there are reordering issues, the last fragment received will not contain the L4 ports, so the key for the reassembled datagram won't contain them. This patch updates the key once we have a reassembled datagram. The handle_fragments() function works on L3 headers so we pull the L3/L4 flow key update code from key_extract into a new function 'key_extract_l3l4'. Then we add a another new function ovs_flow_key_update_l3l4() and export it so that it is accessible by handle_fragments() for conntrack packet reassembly. Co-authored-by: Justin Pettit Signed-off-by: Greg Rose Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 5 ++ net/openvswitch/flow.c | 155 +++++++++++++++++++++++++------------------- net/openvswitch/flow.h | 1 + 3 files changed, 95 insertions(+), 66 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index d8da6477d6be..05249eb45082 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -525,6 +525,11 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, return -EPFNOSUPPORT; } + /* The key extracted from the fragment that completed this datagram + * likely didn't have an L4 header, so regenerate it. + */ + ovs_flow_key_update_l3l4(skb, key); + key->ip.frag = OVS_FRAG_TYPE_NONE; skb_clear_hash(skb); skb->ignore_df = 1; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index bc89e16e0505..005f7622edac 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -523,78 +523,15 @@ static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key) } /** - * key_extract - extracts a flow key from an Ethernet frame. + * key_extract_l3l4 - extracts L3/L4 header information. * @skb: sk_buff that contains the frame, with skb->data pointing to the - * Ethernet header + * L3 header * @key: output flow key * - * The caller must ensure that skb->len >= ETH_HLEN. - * - * Returns 0 if successful, otherwise a negative errno value. - * - * Initializes @skb header fields as follows: - * - * - skb->mac_header: the L2 header. - * - * - skb->network_header: just past the L2 header, or just past the - * VLAN header, to the first byte of the L2 payload. - * - * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 - * on output, then just past the IP header, if one is present and - * of a correct length, otherwise the same as skb->network_header. - * For other key->eth.type values it is left untouched. - * - * - skb->protocol: the type of the data starting at skb->network_header. - * Equals to key->eth.type. */ -static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) +static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) { int error; - struct ethhdr *eth; - - /* Flags are always used as part of stats */ - key->tp.flags = 0; - - skb_reset_mac_header(skb); - - /* Link layer. */ - clear_vlan(key); - if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { - if (unlikely(eth_type_vlan(skb->protocol))) - return -EINVAL; - - skb_reset_network_header(skb); - key->eth.type = skb->protocol; - } else { - eth = eth_hdr(skb); - ether_addr_copy(key->eth.src, eth->h_source); - ether_addr_copy(key->eth.dst, eth->h_dest); - - __skb_pull(skb, 2 * ETH_ALEN); - /* We are going to push all headers that we pull, so no need to - * update skb->csum here. - */ - - if (unlikely(parse_vlan(skb, key))) - return -ENOMEM; - - key->eth.type = parse_ethertype(skb); - if (unlikely(key->eth.type == htons(0))) - return -ENOMEM; - - /* Multiple tagged packets need to retain TPID to satisfy - * skb_vlan_pop(), which will later shift the ethertype into - * skb->protocol. - */ - if (key->eth.cvlan.tci & htons(VLAN_CFI_MASK)) - skb->protocol = key->eth.cvlan.tpid; - else - skb->protocol = key->eth.type; - - skb_reset_network_header(skb); - __skb_push(skb, skb->data - skb_mac_header(skb)); - } - skb_reset_mac_len(skb); /* Network layer. */ if (key->eth.type == htons(ETH_P_IP)) { @@ -788,6 +725,92 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) return 0; } +/** + * key_extract - extracts a flow key from an Ethernet frame. + * @skb: sk_buff that contains the frame, with skb->data pointing to the + * Ethernet header + * @key: output flow key + * + * The caller must ensure that skb->len >= ETH_HLEN. + * + * Returns 0 if successful, otherwise a negative errno value. + * + * Initializes @skb header fields as follows: + * + * - skb->mac_header: the L2 header. + * + * - skb->network_header: just past the L2 header, or just past the + * VLAN header, to the first byte of the L2 payload. + * + * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 + * on output, then just past the IP header, if one is present and + * of a correct length, otherwise the same as skb->network_header. + * For other key->eth.type values it is left untouched. + * + * - skb->protocol: the type of the data starting at skb->network_header. + * Equals to key->eth.type. + */ +static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) +{ + struct ethhdr *eth; + + /* Flags are always used as part of stats */ + key->tp.flags = 0; + + skb_reset_mac_header(skb); + + /* Link layer. */ + clear_vlan(key); + if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { + if (unlikely(eth_type_vlan(skb->protocol))) + return -EINVAL; + + skb_reset_network_header(skb); + key->eth.type = skb->protocol; + } else { + eth = eth_hdr(skb); + ether_addr_copy(key->eth.src, eth->h_source); + ether_addr_copy(key->eth.dst, eth->h_dest); + + __skb_pull(skb, 2 * ETH_ALEN); + /* We are going to push all headers that we pull, so no need to + * update skb->csum here. + */ + + if (unlikely(parse_vlan(skb, key))) + return -ENOMEM; + + key->eth.type = parse_ethertype(skb); + if (unlikely(key->eth.type == htons(0))) + return -ENOMEM; + + /* Multiple tagged packets need to retain TPID to satisfy + * skb_vlan_pop(), which will later shift the ethertype into + * skb->protocol. + */ + if (key->eth.cvlan.tci & htons(VLAN_CFI_MASK)) + skb->protocol = key->eth.cvlan.tpid; + else + skb->protocol = key->eth.type; + + skb_reset_network_header(skb); + __skb_push(skb, skb->data - skb_mac_header(skb)); + } + + skb_reset_mac_len(skb); + + /* Fill out L3/L4 key info, if any */ + return key_extract_l3l4(skb, key); +} + +/* In the case of conntrack fragment handling it expects L3 headers, + * add a helper. + */ +int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key) +{ + return key_extract_l3l4(skb, key); +} + int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) { int res; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index a5506e2d4b7a..b830d5ff7af4 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -270,6 +270,7 @@ void ovs_flow_stats_clear(struct sw_flow *); u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key); +int ovs_flow_key_update_l3l4(struct sk_buff *skb, struct sw_flow_key *key); int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key); -- cgit v1.2.1 From 0754b4e8cdf3eec6e4122e79af26ed9bab20f8f8 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Tue, 27 Aug 2019 07:58:10 -0700 Subject: openvswitch: Clear the L4 portion of the key for "later" fragments. Only the first fragment in a datagram contains the L4 headers. When the Open vSwitch module parses a packet, it always sets the IP protocol field in the key, but can only set the L4 fields on the first fragment. The original behavior would not clear the L4 portion of the key, so garbage values would be sent in the key for "later" fragments. This patch clears the L4 fields in that circumstance to prevent sending those garbage values as part of the upcall. Signed-off-by: Justin Pettit Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 005f7622edac..9d81d2c7bf82 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -560,6 +560,7 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) offset = nh->frag_off & htons(IP_OFFSET); if (offset) { key->ip.frag = OVS_FRAG_TYPE_LATER; + memset(&key->tp, 0, sizeof(key->tp)); return 0; } if (nh->frag_off & htons(IP_MF) || @@ -677,8 +678,10 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) return error; } - if (key->ip.frag == OVS_FRAG_TYPE_LATER) + if (key->ip.frag == OVS_FRAG_TYPE_LATER) { + memset(&key->tp, 0, sizeof(key->tp)); return 0; + } if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) key->ip.frag = OVS_FRAG_TYPE_FIRST; -- cgit v1.2.1 From a256f2e329df0773022d28df2c3d206b9aaf1e61 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 19 Aug 2019 05:14:23 +0000 Subject: RISC-V: Fix FIXMAP area corruption on RV32 systems Currently, various virtual memory areas of Linux RISC-V are organized in increasing order of their virtual addresses is as follows: 1. User space area (This is lowest area and starts at 0x0) 2. FIXMAP area 3. VMALLOC area 4. Kernel area (This is highest area and starts at PAGE_OFFSET) The maximum size of user space aread is represented by TASK_SIZE. On RV32 systems, TASK_SIZE is defined as VMALLOC_START which causes the user space area to overlap the FIXMAP area. This allows user space apps to potentially corrupt the FIXMAP area and kernel OF APIs will crash whenever they access corrupted FDT in the FIXMAP area. On RV64 systems, TASK_SIZE is set to fixed 256GB and no other areas happen to overlap so we don't see any FIXMAP area corruptions. This patch fixes FIXMAP area corruption on RV32 systems by setting TASK_SIZE to FIXADDR_START. We also move FIXADDR_TOP, FIXADDR_SIZE, and FIXADDR_START defines to asm/pgtable.h so that we can avoid cyclic header includes. Signed-off-by: Anup Patel Tested-by: Alistair Francis Reviewed-by: Christoph Hellwig Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/fixmap.h | 4 ---- arch/riscv/include/asm/pgtable.h | 12 ++++++++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index 9c66033c3a54..161f28d04a07 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -30,10 +30,6 @@ enum fixed_addresses { __end_of_fixed_addresses }; -#define FIXADDR_SIZE (__end_of_fixed_addresses * PAGE_SIZE) -#define FIXADDR_TOP (VMALLOC_START) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) - #define FIXMAP_PAGE_IO PAGE_KERNEL #define __early_set_fixmap __set_fixmap diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index a364aba23d55..c24a083b3e12 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -420,14 +420,22 @@ static inline void pgtable_cache_init(void) #define VMALLOC_END (PAGE_OFFSET - 1) #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) +#define FIXADDR_TOP VMALLOC_START +#ifdef CONFIG_64BIT +#define FIXADDR_SIZE PMD_SIZE +#else +#define FIXADDR_SIZE PGDIR_SIZE +#endif +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + /* - * Task size is 0x4000000000 for RV64 or 0xb800000 for RV32. + * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32. * Note that PGDIR_SIZE must evenly divide TASK_SIZE. */ #ifdef CONFIG_64BIT #define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2) #else -#define TASK_SIZE VMALLOC_START +#define TASK_SIZE FIXADDR_START #endif #include -- cgit v1.2.1 From 5b3efa4f1479c91cb8361acef55f9c6662feba57 Mon Sep 17 00:00:00 2001 From: zhaoyang Date: Mon, 26 Aug 2019 04:07:37 +0100 Subject: ARM: 8901/1: add a criteria for pfn_valid of arm pfn_valid can be wrong when parsing a invalid pfn whose phys address exceeds BITS_PER_LONG as the MSB will be trimed when shifted. The issue originally arise from bellowing call stack, which corresponding to an access of the /proc/kpageflags from userspace with a invalid pfn parameter and leads to kernel panic. [46886.723249] c7 [] (stable_page_flags) from [] [46886.723264] c7 [] (kpageflags_read) from [] [46886.723280] c7 [] (proc_reg_read) from [] [46886.723290] c7 [] (__vfs_read) from [] [46886.723301] c7 [] (vfs_read) from [] [46886.723315] c7 [] (SyS_pread64) from [] (ret_fast_syscall+0x0/0x28) Signed-off-by: Zhaoyang Huang Signed-off-by: Russell King --- arch/arm/mm/init.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 3a65ded832df..b4be3baa83d4 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -175,6 +175,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max_low, #ifdef CONFIG_HAVE_ARCH_PFN_VALID int pfn_valid(unsigned long pfn) { + phys_addr_t addr = __pfn_to_phys(pfn); + + if (__phys_to_pfn(addr) != pfn) + return 0; + return memblock_is_map_memory(__pfn_to_phys(pfn)); } EXPORT_SYMBOL(pfn_valid); -- cgit v1.2.1 From 36f1031c51a2538e5558fb44c6d6b88f98d3c0f2 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 27 Aug 2019 11:10:04 -0500 Subject: ibmvnic: Do not process reset during or after device removal Currently, the ibmvnic driver will not schedule device resets if the device is being removed, but does not check the device state before the reset is actually processed. This leads to a race where a reset is scheduled with a valid device state but is processed after the driver has been removed, resulting in an oops. Fix this by checking the device state before processing a queued reset event. Reported-by: Abdul Haleem Tested-by: Abdul Haleem Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index cebd20f3128d..fa4bb940665c 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1983,6 +1983,10 @@ static void __ibmvnic_reset(struct work_struct *work) rwi = get_next_rwi(adapter); while (rwi) { + if (adapter->state == VNIC_REMOVING || + adapter->state == VNIC_REMOVED) + goto out; + if (adapter->force_reset_recovery) { adapter->force_reset_recovery = false; rc = do_hard_reset(adapter, rwi, reset_state); @@ -2007,7 +2011,7 @@ static void __ibmvnic_reset(struct work_struct *work) netdev_dbg(adapter->netdev, "Reset failed\n"); free_all_rwi(adapter); } - +out: adapter->resetting = false; if (we_lock_rtnl) rtnl_unlock(); -- cgit v1.2.1 From dbf47a2a094edf58983265e323ca4bdcdb58b5ee Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 27 Aug 2019 21:49:38 +0300 Subject: net: sched: act_sample: fix psample group handling on overwrite Action sample doesn't properly handle psample_group pointer in overwrite case. Following issues need to be fixed: - In tcf_sample_init() function RCU_INIT_POINTER() is used to set s->psample_group, even though we neither setting the pointer to NULL, nor preventing concurrent readers from accessing the pointer in some way. Use rcu_swap_protected() instead to safely reset the pointer. - Old value of s->psample_group is not released or deallocated in any way, which results resource leak. Use psample_group_put() on non-NULL value obtained with rcu_swap_protected(). - The function psample_group_put() that released reference to struct psample_group pointed by rcu-pointer s->psample_group doesn't respect rcu grace period when deallocating it. Extend struct psample_group with rcu head and use kfree_rcu when freeing it. Fixes: 5c5670fae430 ("net/sched: Introduce sample tc action") Signed-off-by: Vlad Buslov Signed-off-by: David S. Miller --- include/net/psample.h | 1 + net/psample/psample.c | 2 +- net/sched/act_sample.c | 6 +++++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/net/psample.h b/include/net/psample.h index 37a4df2325b2..6b578ce69cd8 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -11,6 +11,7 @@ struct psample_group { u32 group_num; u32 refcount; u32 seq; + struct rcu_head rcu; }; struct psample_group *psample_group_get(struct net *net, u32 group_num); diff --git a/net/psample/psample.c b/net/psample/psample.c index 841f198ea1a8..66e4b61a350d 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -154,7 +154,7 @@ static void psample_group_destroy(struct psample_group *group) { psample_group_notify(group, PSAMPLE_CMD_DEL_GROUP); list_del(&group->list); - kfree(group); + kfree_rcu(group, rcu); } static struct psample_group * diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 7eff363f9f03..10229124a992 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -102,13 +102,17 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); s->rate = rate; s->psample_group_num = psample_group_num; - RCU_INIT_POINTER(s->psample_group, psample_group); + rcu_swap_protected(s->psample_group, psample_group, + lockdep_is_held(&s->tcf_lock)); if (tb[TCA_SAMPLE_TRUNC_SIZE]) { s->truncate = true; s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]); } spin_unlock_bh(&s->tcf_lock); + + if (psample_group) + psample_group_put(psample_group); if (goto_ch) tcf_chain_put_by_act(goto_ch); -- cgit v1.2.1 From 888a5c53c0d8be6e98bc85b677f179f77a647873 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 27 Aug 2019 15:09:33 -0400 Subject: tcp: inherit timestamp on mtu probe TCP associates tx timestamp requests with a byte in the bytestream. If merging skbs in tcp_mtu_probe, migrate the tstamp request. Similar to MSG_EOR, do not allow moving a timestamp from any segment in the probe but the last. This to avoid merging multiple timestamps. Tested with the packetdrill script at https://github.com/wdebruij/packetdrill/commits/mtu_probe-1 Link: http://patchwork.ozlabs.org/patch/1143278/#2232897 Fixes: 4ed2d765dfac ("net-timestamp: TCP timestamping") Signed-off-by: Willem de Bruijn Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 979520e46e33..8a645f304e6c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2053,7 +2053,7 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len) if (len <= skb->len) break; - if (unlikely(TCP_SKB_CB(skb)->eor)) + if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb)) return false; len -= skb->len; @@ -2170,6 +2170,7 @@ static int tcp_mtu_probe(struct sock *sk) * we need to propagate it to the new skb. */ TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor; + tcp_skb_collapse_tstamp(nskb, skb); tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); } else { -- cgit v1.2.1 From 092e22e586236bba106a82113826a68080a03506 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 27 Aug 2019 23:18:53 +0200 Subject: net/sched: pfifo_fast: fix wrong dereference in pfifo_fast_enqueue Now that 'TCQ_F_CPUSTATS' bit can be cleared, depending on the value of 'TCQ_F_NOLOCK' bit in the parent qdisc, we can't assume anymore that per-cpu counters are there in the error path of skb_array_produce(). Otherwise, the following splat can be seen: Unable to handle kernel paging request at virtual address 0000600dea430008 Mem abort info: ESR = 0x96000005 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000005 CM = 0, WnR = 0 user pgtable: 64k pages, 48-bit VAs, pgdp = 000000007b97530e [0000600dea430008] pgd=0000000000000000, pud=0000000000000000 Internal error: Oops: 96000005 [#1] SMP [...] pstate: 10000005 (nzcV daif -PAN -UAO) pc : pfifo_fast_enqueue+0x524/0x6e8 lr : pfifo_fast_enqueue+0x46c/0x6e8 sp : ffff800d39376fe0 x29: ffff800d39376fe0 x28: 1ffff001a07d1e40 x27: ffff800d03e8f188 x26: ffff800d03e8f200 x25: 0000000000000062 x24: ffff800d393772f0 x23: 0000000000000000 x22: 0000000000000403 x21: ffff800cca569a00 x20: ffff800d03e8ee00 x19: ffff800cca569a10 x18: 00000000000000bf x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: ffff1001a726edd0 x13: 1fffe4000276a9a4 x12: 0000000000000000 x11: dfff200000000000 x10: ffff800d03e8f1a0 x9 : 0000000000000003 x8 : 0000000000000000 x7 : 00000000f1f1f1f1 x6 : ffff1001a726edea x5 : ffff800cca56a53c x4 : 1ffff001bf9a8003 x3 : 1ffff001bf9a8003 x2 : 1ffff001a07d1dcb x1 : 0000600dea430000 x0 : 0000600dea430008 Process ping (pid: 6067, stack limit = 0x00000000dc0aa557) Call trace: pfifo_fast_enqueue+0x524/0x6e8 htb_enqueue+0x660/0x10e0 [sch_htb] __dev_queue_xmit+0x123c/0x2de0 dev_queue_xmit+0x24/0x30 ip_finish_output2+0xc48/0x1720 ip_finish_output+0x548/0x9d8 ip_output+0x334/0x788 ip_local_out+0x90/0x138 ip_send_skb+0x44/0x1d0 ip_push_pending_frames+0x5c/0x78 raw_sendmsg+0xed8/0x28d0 inet_sendmsg+0xc4/0x5c0 sock_sendmsg+0xac/0x108 __sys_sendto+0x1ac/0x2a0 __arm64_sys_sendto+0xc4/0x138 el0_svc_handler+0x13c/0x298 el0_svc+0x8/0xc Code: f9402e80 d538d081 91002000 8b010000 (885f7c03) Fix this by testing the value of 'TCQ_F_CPUSTATS' bit in 'qdisc->flags', before dereferencing 'qdisc->cpu_qstats'. Fixes: 8a53e616de29 ("net: sched: when clearing NOLOCK, clear TCQ_F_CPUSTATS, too") CC: Paolo Abeni CC: Stefano Brivio Reported-by: Li Shuang Signed-off-by: Davide Caratti Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 099797e5409d..137db1cbde85 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -624,8 +624,12 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, err = skb_array_produce(q, skb); - if (unlikely(err)) - return qdisc_drop_cpu(skb, qdisc, to_free); + if (unlikely(err)) { + if (qdisc_is_percpu_stats(qdisc)) + return qdisc_drop_cpu(skb, qdisc, to_free); + else + return qdisc_drop(skb, qdisc, to_free); + } qdisc_update_stats_at_enqueue(qdisc, pkt_len); return NET_XMIT_SUCCESS; -- cgit v1.2.1 From 49d4b14113cae1410eb4654ada5b9583bad971c4 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 28 Aug 2019 09:51:41 +0800 Subject: Revert "r8152: napi hangup fix after disconnect" This reverts commit 0ee1f4734967af8321ecebaf9c74221ace34f2d5. The commit 0ee1f4734967 ("r8152: napi hangup fix after disconnect") adds a check about RTL8152_UNPLUG to determine if calling napi_disable() is invalid in rtl8152_close(), when rtl8152_disconnect() is called. This avoids to use napi_disable() after calling netif_napi_del(). Howver, commit ffa9fec30ca0 ("r8152: set RTL8152_UNPLUG only for real disconnection") causes that RTL8152_UNPLUG is not always set when calling rtl8152_disconnect(). Therefore, I have to revert commit 0ee1f4734967 ("r8152: napi hangup fix after disconnect"), first. And submit another patch to fix it. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index eee0f5007ee3..ad3abe26b51b 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -4021,8 +4021,7 @@ static int rtl8152_close(struct net_device *netdev) #ifdef CONFIG_PM_SLEEP unregister_pm_notifier(&tp->pm_notifier); #endif - if (!test_bit(RTL8152_UNPLUG, &tp->flags)) - napi_disable(&tp->napi); + napi_disable(&tp->napi); clear_bit(WORK_ENABLE, &tp->flags); usb_kill_urb(tp->intr_urb); cancel_delayed_work_sync(&tp->schedule); -- cgit v1.2.1 From 973dc6cfc0e2c43ff29ca5645ceaf1ae694ea110 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 28 Aug 2019 09:51:42 +0800 Subject: r8152: remove calling netif_napi_del Remove unnecessary use of netif_napi_del. This also avoids to call napi_disable() after netif_napi_del(). Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index ad3abe26b51b..04137ac373b0 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5352,7 +5352,6 @@ static int rtl8152_probe(struct usb_interface *intf, return 0; out1: - netif_napi_del(&tp->napi); usb_set_intfdata(intf, NULL); out: free_netdev(netdev); @@ -5367,7 +5366,6 @@ static void rtl8152_disconnect(struct usb_interface *intf) if (tp) { rtl_set_unplug(tp); - netif_napi_del(&tp->napi); unregister_netdev(tp->netdev); cancel_delayed_work_sync(&tp->hw_phy_work); tp->rtl_ops.unload(tp); -- cgit v1.2.1 From 739d7c5752b255e89ddbb1b0474f3b88ef5cd343 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Tue, 27 Aug 2019 22:56:29 -0700 Subject: nfp: flower: prevent ingress block binds on internal ports Internal port TC offload is implemented through user-space applications (such as OvS) by adding filters at egress via TC clsact qdiscs. Indirect block offload support in the NFP driver accepts both ingress qdisc binds and egress binds if the device is an internal port. However, clsact sends bind notification for both ingress and egress block binds which can lead to the driver registering multiple callbacks and receiving multiple notifications of new filters. Fix this by rejecting ingress block bind callbacks when the port is internal and only adding filter callbacks for egress binds. Fixes: 4d12ba42787b ("nfp: flower: allow offloading of matches on 'internal' ports") Signed-off-by: John Hurley Reviewed-by: Jakub Kicinski Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/offload.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 9917d64694c6..457bdc60f3ee 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1409,9 +1409,10 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app, struct nfp_flower_priv *priv = app->priv; struct flow_block_cb *block_cb; - if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && - !(f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && - nfp_flower_internal_port_can_offload(app, netdev))) + if ((f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && + !nfp_flower_internal_port_can_offload(app, netdev)) || + (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && + nfp_flower_internal_port_can_offload(app, netdev))) return -EOPNOTSUPP; switch (f->command) { -- cgit v1.2.1 From e8024cb483abb2b0290b3ef5e34c736e9de2492f Mon Sep 17 00:00:00 2001 From: John Hurley Date: Tue, 27 Aug 2019 22:56:30 -0700 Subject: nfp: flower: handle neighbour events on internal ports Recent code changes to NFP allowed the offload of neighbour entries to FW when the next hop device was an internal port. This allows for offload of tunnel encap when the end-point IP address is applied to such a port. Unfortunately, the neighbour event handler still rejects events that are not associated with a repr dev and so the firmware neighbour table may get out of sync for internal ports. Fix this by allowing internal port neighbour events to be correctly processed. Fixes: 45756dfedab5 ("nfp: flower: allow tunnels to output to internal port") Signed-off-by: John Hurley Reviewed-by: Simon Horman Reviewed-by: Jakub Kicinski Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index a7a80f4b722a..f0ee982eb1b5 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -328,13 +328,13 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, flow.daddr = *(__be32 *)n->primary_key; - /* Only concerned with route changes for representors. */ - if (!nfp_netdev_is_nfp_repr(n->dev)) - return NOTIFY_DONE; - app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb); app = app_priv->app; + if (!nfp_netdev_is_nfp_repr(n->dev) && + !nfp_flower_internal_port_can_offload(app, n->dev)) + return NOTIFY_DONE; + /* Only concerned with changes to routes already added to NFP. */ if (!nfp_tun_has_route(app, flow.daddr)) return NOTIFY_DONE; -- cgit v1.2.1 From 189308d5823a089b56e2299cd96589507dac7319 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 28 Aug 2019 08:31:19 +0200 Subject: sky2: Disable MSI on yet another ASUS boards (P6Xxxx) A similar workaround for the suspend/resume problem is needed for yet another ASUS machines, P6X models. Like the previous fix, the BIOS doesn't provide the standard DMI_SYS_* entry, so again DMI_BOARD_* entries are used instead. Reported-and-tested-by: SteveM Signed-off-by: Takashi Iwai Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/sky2.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index a01c75ede871..e0363870f3a5 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4931,6 +4931,13 @@ static const struct dmi_system_id msi_blacklist[] = { DMI_MATCH(DMI_BOARD_NAME, "P6T"), }, }, + { + .ident = "ASUS P6X", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P6X"), + }, + }, {} }; -- cgit v1.2.1 From dc9cfd2692225a2164f4f20b7deaf38ca8645de3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 13 Aug 2019 16:03:13 -0700 Subject: mtd: hyperbus: fix dependency and build error lib/devres.c, which implements devm_ioremap_resource(), is only built when CONFIG_HAS_IOMEM is set/enabled, so MTD_HYPERBUS should depend on HAS_IOMEM. Fixes a build error and a Kconfig warning (as seen on UML builds): WARNING: unmet direct dependencies detected for MTD_COMPLEX_MAPPINGS Depends on [n]: MTD [=m] && HAS_IOMEM [=n] Selected by [m]: - MTD_HYPERBUS [=m] && MTD [=m] ERROR: "devm_ioremap_resource" [drivers/mtd/hyperbus/hyperbus-core.ko] undefined! Fixes: dcc7d3446a0f ("mtd: Add support for HyperBus memory devices") Signed-off-by: Randy Dunlap Cc: Vignesh Raghavendra Cc: Miquel Raynal Cc: Geert Uytterhoeven Cc: linux-mtd@lists.infradead.org Acked-by: Vignesh Raghavendra Signed-off-by: Miquel Raynal --- drivers/mtd/hyperbus/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/hyperbus/Kconfig b/drivers/mtd/hyperbus/Kconfig index b4e3caf7d799..a4d8968d133d 100644 --- a/drivers/mtd/hyperbus/Kconfig +++ b/drivers/mtd/hyperbus/Kconfig @@ -1,5 +1,6 @@ menuconfig MTD_HYPERBUS tristate "HyperBus support" + depends on HAS_IOMEM select MTD_CFI select MTD_MAP_BANK_WIDTH_2 select MTD_CFI_AMDSTD -- cgit v1.2.1 From de20900fbe1c4fd36de25a7a5a43223254ecf0d0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Aug 2019 21:23:45 +0200 Subject: netfilter: nf_flow_table: clear skb tstamp before xmit If 'fq' qdisc is used and a program has requested timestamps, skb->tstamp needs to be cleared, else fq will treat these as 'transmit time'. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index d68c801dd614..b9e7dd6e60ce 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -228,7 +228,6 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, { skb_orphan(skb); skb_dst_set_noref(skb, dst); - skb->tstamp = 0; dst_output(state->net, state->sk, skb); return NF_STOLEN; } @@ -284,6 +283,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; iph = ip_hdr(skb); ip_decrease_ttl(iph); + skb->tstamp = 0; if (unlikely(dst_xfrm(&rt->dst))) { memset(skb->cb, 0, sizeof(struct inet_skb_parm)); @@ -512,6 +512,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; ip6h = ipv6_hdr(skb); ip6h->hop_limit--; + skb->tstamp = 0; if (unlikely(dst_xfrm(&rt->dst))) { memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); -- cgit v1.2.1 From b9500577d361522a3d9f14da8cf41dc1d824904e Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 21 Aug 2019 20:17:32 +0300 Subject: iwlwifi: pcie: handle switching killer Qu B0 NICs to C0 We need to use a different firmware for C0 versions of killer Qu NICs. Add structures for them and handle them in the if block that detects C0 revisions. Additionally, instead of having an inclusive check for QnJ devices, make the selection exclusive, so that switching to QnJ is the exception, not the default. This prevents us from having to add all the non-QnJ cards to an exclusion list. To do so, only go into the QnJ block if the device has an RF ID type HR and HW revision QnJ. Cc: stable@vger.kernel.org # 5.2 Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20190821171732.2266-1-luca@coelho.fi Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 24 ++++++++++++++++++++++++ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 2 ++ drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 4 ++++ drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 7 +------ 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index 1f500cddb3a7..55b713255b8e 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -556,6 +556,30 @@ const struct iwl_cfg killer1650i_2ax_cfg_qu_b0_hr_b0 = { .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, }; +const struct iwl_cfg killer1650s_2ax_cfg_qu_c0_hr_b0 = { + .name = "Killer(R) Wi-Fi 6 AX1650i 160MHz Wireless Network Adapter (201NGW)", + .fw_name_pre = IWL_QU_C_HR_B_FW_PRE, + IWL_DEVICE_22500, + /* + * This device doesn't support receiving BlockAck with a large bitmap + * so we need to restrict the size of transmitted aggregation to the + * HT size; mac80211 would otherwise pick the HE max (256) by default. + */ + .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, +}; + +const struct iwl_cfg killer1650i_2ax_cfg_qu_c0_hr_b0 = { + .name = "Killer(R) Wi-Fi 6 AX1650s 160MHz Wireless Network Adapter (201D2W)", + .fw_name_pre = IWL_QU_C_HR_B_FW_PRE, + IWL_DEVICE_22500, + /* + * This device doesn't support receiving BlockAck with a large bitmap + * so we need to restrict the size of transmitted aggregation to the + * HT size; mac80211 would otherwise pick the HE max (256) by default. + */ + .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, +}; + const struct iwl_cfg iwl22000_2ax_cfg_jf = { .name = "Intel(R) Dual Band Wireless AX 22000", .fw_name_pre = IWL_QU_B_JF_B_FW_PRE, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 1c1bf1b281cd..6c04f8223aff 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -577,6 +577,8 @@ extern const struct iwl_cfg iwl_ax1650i_cfg_quz_hr; extern const struct iwl_cfg iwl_ax1650s_cfg_quz_hr; extern const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0; extern const struct iwl_cfg killer1650i_2ax_cfg_qu_b0_hr_b0; +extern const struct iwl_cfg killer1650s_2ax_cfg_qu_c0_hr_b0; +extern const struct iwl_cfg killer1650i_2ax_cfg_qu_c0_hr_b0; extern const struct iwl_cfg killer1650x_2ax_cfg; extern const struct iwl_cfg killer1650w_2ax_cfg; extern const struct iwl_cfg iwl9461_2ac_cfg_qu_b0_jf_b0; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 7c5aaeaf7fe5..d9ed53b7c768 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1062,6 +1062,10 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) iwl_trans->cfg = &iwl9560_2ac_cfg_qu_c0_jf_b0; else if (iwl_trans->cfg == &iwl9560_2ac_160_cfg_qu_b0_jf_b0) iwl_trans->cfg = &iwl9560_2ac_160_cfg_qu_c0_jf_b0; + else if (iwl_trans->cfg == &killer1650s_2ax_cfg_qu_b0_hr_b0) + iwl_trans->cfg = &killer1650s_2ax_cfg_qu_c0_hr_b0; + else if (iwl_trans->cfg == &killer1650i_2ax_cfg_qu_b0_hr_b0) + iwl_trans->cfg = &killer1650i_2ax_cfg_qu_c0_hr_b0; } /* same thing for QuZ... */ diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 935e35dafce5..db62c8314603 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3602,12 +3602,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, } } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) && - ((trans->cfg != &iwl_ax200_cfg_cc && - trans->cfg != &iwl_ax201_cfg_qu_hr && - trans->cfg != &killer1650x_2ax_cfg && - trans->cfg != &killer1650w_2ax_cfg && - trans->cfg != &iwl_ax201_cfg_quz_hr) || - trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0)) { + trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0) { u32 hw_status; hw_status = iwl_read_prph(trans, UMAG_GEN_HW_STATUS); -- cgit v1.2.1 From c8a41c6afa27b8c3f61622dfd882b912da9d6721 Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Tue, 27 Aug 2019 17:41:19 -0500 Subject: mac80211: Don't memset RXCB prior to PAE intercept In ieee80211_deliver_skb_to_local_stack intercepts EAPoL frames if mac80211 is configured to do so and forwards the contents over nl80211. During this process some additional data is also forwarded, including whether the frame was received encrypted or not. Unfortunately just prior to the call to ieee80211_deliver_skb_to_local_stack, skb->cb is cleared, resulting in incorrect data being exposed over nl80211. Fixes: 018f6fbf540d ("mac80211: Send control port frames over nl80211") Cc: stable@vger.kernel.org Signed-off-by: Denis Kenzior Link: https://lore.kernel.org/r/20190827224120.14545-2-denkenz@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3c1ab870fefe..7c4aeac006fb 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2452,6 +2452,8 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb, cfg80211_rx_control_port(dev, skb, noencrypt); dev_kfree_skb(skb); } else { + memset(skb->cb, 0, sizeof(skb->cb)); + /* deliver to local stack */ if (rx->napi) napi_gro_receive(rx->napi, skb); @@ -2546,8 +2548,6 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) if (skb) { skb->protocol = eth_type_trans(skb, dev); - memset(skb->cb, 0, sizeof(skb->cb)); - ieee80211_deliver_skb_to_local_stack(skb, rx); } -- cgit v1.2.1 From f8b43c5cf4b62a19f2210a0f5367b84e1eff1ab9 Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Tue, 27 Aug 2019 17:41:20 -0500 Subject: mac80211: Correctly set noencrypt for PAE frames The noencrypt flag was intended to be set if the "frame was received unencrypted" according to include/uapi/linux/nl80211.h. However, the current behavior is opposite of this. Cc: stable@vger.kernel.org Fixes: 018f6fbf540d ("mac80211: Send control port frames over nl80211") Signed-off-by: Denis Kenzior Link: https://lore.kernel.org/r/20190827224120.14545-3-denkenz@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7c4aeac006fb..768d14c9a716 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2447,7 +2447,7 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb, skb->protocol == cpu_to_be16(ETH_P_PREAUTH)) && sdata->control_port_over_nl80211)) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - bool noencrypt = status->flag & RX_FLAG_DECRYPTED; + bool noencrypt = !(status->flag & RX_FLAG_DECRYPTED); cfg80211_rx_control_port(dev, skb, noencrypt); dev_kfree_skb(skb); -- cgit v1.2.1 From 7a6c9dbb36a415c5901313fc89871fd19f533656 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 23 Aug 2019 11:03:52 +0200 Subject: soc: ixp4xx: Protect IXP4xx SoC drivers by ARCH_IXP4XX || COMPILE_TEST The move of the IXP4xx SoC drivers exposed their config options on all platforms. Fix this by wrapping them inside an ARCH_IXP4XX or COMPILE_TEST block. Link: https://lore.kernel.org/r/20190823090352.12243-1-linus.walleij@linaro.org Fixes: fcf2d8978cd538a5 ("ARM: ixp4xx: Move NPE and QMGR to drivers/soc") Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Walleij Signed-off-by: Arnd Bergmann --- drivers/soc/ixp4xx/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/soc/ixp4xx/Kconfig b/drivers/soc/ixp4xx/Kconfig index de2e62c3310a..e3eb19b85fa4 100644 --- a/drivers/soc/ixp4xx/Kconfig +++ b/drivers/soc/ixp4xx/Kconfig @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only +if ARCH_IXP4XX || COMPILE_TEST + menu "IXP4xx SoC drivers" config IXP4XX_QMGR @@ -15,3 +17,5 @@ config IXP4XX_NPE and is automatically selected by Ethernet and HSS drivers. endmenu + +endif -- cgit v1.2.1 From 00a0c8451abcea98a7ea4abf583a308a6513d8ba Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 20 Aug 2019 20:26:44 -0500 Subject: ARC: unwind: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. This patch fixes the following warnings (Building: haps_hs_defconfig arc): arch/arc/kernel/unwind.c: In function ‘read_pointer’: ./include/linux/compiler.h:328:5: warning: this statement may fall through [-Wimplicit-fallthrough=] do { \ ^ ./include/linux/compiler.h:338:2: note: in expansion of macro ‘__compiletime_assert’ __compiletime_assert(condition, msg, prefix, suffix) ^~~~~~~~~~~~~~~~~~~~ ./include/linux/compiler.h:350:2: note: in expansion of macro ‘_compiletime_assert’ _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) ^~~~~~~~~~~~~~~~~~~ ./include/linux/build_bug.h:39:37: note: in expansion of macro ‘compiletime_assert’ #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) ^~~~~~~~~~~~~~~~~~ ./include/linux/build_bug.h:50:2: note: in expansion of macro ‘BUILD_BUG_ON_MSG’ BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition) ^~~~~~~~~~~~~~~~ arch/arc/kernel/unwind.c:573:3: note: in expansion of macro ‘BUILD_BUG_ON’ BUILD_BUG_ON(sizeof(u32) != sizeof(value)); ^~~~~~~~~~~~ arch/arc/kernel/unwind.c:575:2: note: here case DW_EH_PE_native: ^~~~ Signed-off-by: Gustavo A. R. Silva --- arch/arc/kernel/unwind.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index c2663fce7f6c..725adfcdd116 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -572,6 +572,7 @@ static unsigned long read_pointer(const u8 **pLoc, const void *end, #else BUILD_BUG_ON(sizeof(u32) != sizeof(value)); #endif + /* Fall through */ case DW_EH_PE_native: if (end < (const void *)(ptr.pul + 1)) return 0; -- cgit v1.2.1 From 7c9eb2dbd770b7c9980d5839dd305a70fbc5df67 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 27 Aug 2019 11:57:07 -0500 Subject: nds32: Mark expected switch fall-throughs Mark switch cases where we are expecting to fall through. This patch fixes the following warnings (Building: allmodconfig nds32): include/math-emu/soft-fp.h:124:8: warning: this statement may fall through [-Wimplicit-fallthrough=] arch/nds32/kernel/signal.c:362:20: warning: this statement may fall through [-Wimplicit-fallthrough=] arch/nds32/kernel/signal.c:315:7: warning: this statement may fall through [-Wimplicit-fallthrough=] include/math-emu/op-common.h:417:11: warning: this statement may fall through [-Wimplicit-fallthrough=] include/math-emu/op-common.h:430:11: warning: this statement may fall through [-Wimplicit-fallthrough=] include/math-emu/op-common.h:310:11: warning: this statement may fall through [-Wimplicit-fallthrough=] include/math-emu/op-common.h:320:11: warning: this statement may fall through [-Wimplicit-fallthrough=] include/math-emu/op-common.h:310:11: warning: this statement may fall through [-Wimplicit-fallthrough=] include/math-emu/op-common.h:320:11: warning: this statement may fall through [-Wimplicit-fallthrough=] include/math-emu/soft-fp.h:124:8: warning: this statement may fall through [-Wimplicit-fallthrough=] include/math-emu/op-common.h:417:11: warning: this statement may fall through [-Wimplicit-fallthrough=] include/math-emu/op-common.h:430:11: warning: this statement may fall through [-Wimplicit-fallthrough=] include/math-emu/op-common.h:310:11: warning: this statement may fall through [-Wimplicit-fallthrough=] include/math-emu/op-common.h:320:11: warning: this statement may fall through [-Wimplicit-fallthrough=] include/math-emu/op-common.h:310:11: warning: this statement may fall through [-Wimplicit-fallthrough=] include/math-emu/op-common.h:320:11: warning: this statement may fall through [-Wimplicit-fallthrough=] Reported-by: Michael Ellerman Signed-off-by: Gustavo A. R. Silva --- arch/nds32/kernel/signal.c | 2 ++ include/math-emu/op-common.h | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/arch/nds32/kernel/signal.c b/arch/nds32/kernel/signal.c index fe61513982b4..330b19fcd990 100644 --- a/arch/nds32/kernel/signal.c +++ b/arch/nds32/kernel/signal.c @@ -316,6 +316,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) regs->uregs[0] = -EINTR; break; } + /* Else, fall through */ case -ERESTARTNOINTR: regs->uregs[0] = regs->orig_r0; regs->ipc -= 4; @@ -360,6 +361,7 @@ static void do_signal(struct pt_regs *regs) switch (regs->uregs[0]) { case -ERESTART_RESTARTBLOCK: regs->uregs[15] = __NR_restart_syscall; + /* Fall through */ case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: diff --git a/include/math-emu/op-common.h b/include/math-emu/op-common.h index f37d12877754..adcc6a97db61 100644 --- a/include/math-emu/op-common.h +++ b/include/math-emu/op-common.h @@ -308,6 +308,7 @@ do { \ \ case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO): \ R##_e = X##_e; \ + /* Fall through */ \ case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL): \ case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF): \ case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO): \ @@ -318,6 +319,7 @@ do { \ \ case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL): \ R##_e = Y##_e; \ + /* Fall through */ \ case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN): \ case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN): \ case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN): \ @@ -415,6 +417,7 @@ do { \ case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF): \ case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO): \ R##_s = X##_s; \ + /* Fall through */ \ \ case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF): \ case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL): \ @@ -428,6 +431,7 @@ do { \ case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN): \ case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN): \ R##_s = Y##_s; \ + /* Fall through */ \ \ case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF): \ case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO): \ @@ -493,6 +497,7 @@ do { \ \ case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO): \ FP_SET_EXCEPTION(FP_EX_DIVZERO); \ + /* Fall through */ \ case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO): \ case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL): \ R##_c = FP_CLS_INF; \ -- cgit v1.2.1 From 689f535843ac2633b395cfc494446326d03efab6 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 19 Aug 2019 22:48:25 +0200 Subject: i2c: make i2c_unregister_device() ERR_PTR safe We are moving towards returning ERR_PTRs when i2c_new_*_device() calls fail. Make sure its counterpart for unregistering handles ERR_PTRs as well. Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index f26ed495d384..9c440fa6a3dd 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -832,7 +832,7 @@ EXPORT_SYMBOL_GPL(i2c_new_device); */ void i2c_unregister_device(struct i2c_client *client) { - if (!client) + if (IS_ERR_OR_NULL(client)) return; if (client->dev.of_node) { -- cgit v1.2.1 From 01641b266da33e2cc57b4ea1767ba3e24ce0846b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 16 Aug 2019 16:17:05 +0300 Subject: i2c: i801: Avoid memory leak in check_acpi_smo88xx_device() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit check_acpi_smo88xx_device() utilizes acpi_get_object_info() which in its turn allocates a buffer. User is responsible to clean allocated resources. The last has been missed in the original code. Fix it here. While here, replace !ACPI_SUCCESS() with ACPI_FAILURE(). Fixes: 19b07cb4a187 ("i2c: i801: Register optional lis3lv02d I2C device on Dell machines") Signed-off-by: Andy Shevchenko Reviewed-by: Pali Rohár Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index f2956936c3f2..2e08b4722dc4 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1194,19 +1194,28 @@ static acpi_status check_acpi_smo88xx_device(acpi_handle obj_handle, int i; status = acpi_get_object_info(obj_handle, &info); - if (!ACPI_SUCCESS(status) || !(info->valid & ACPI_VALID_HID)) + if (ACPI_FAILURE(status)) return AE_OK; + if (!(info->valid & ACPI_VALID_HID)) + goto smo88xx_not_found; + hid = info->hardware_id.string; if (!hid) - return AE_OK; + goto smo88xx_not_found; i = match_string(acpi_smo8800_ids, ARRAY_SIZE(acpi_smo8800_ids), hid); if (i < 0) - return AE_OK; + goto smo88xx_not_found; + + kfree(info); *((bool *)return_value) = true; return AE_CTRL_TERMINATE; + +smo88xx_not_found: + kfree(info); + return AE_OK; } static bool is_dell_system_with_lis3lv02d(void) -- cgit v1.2.1 From c486dcd2f1bbdd524a1e0149734b79e4ae329650 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 15 Aug 2019 16:52:11 +0300 Subject: i2c: designware: Synchronize IRQs when unregistering slave client Make sure interrupt handler i2c_dw_irq_handler_slave() has finished before clearing the the dev->slave pointer in i2c_dw_unreg_slave(). There is possibility for a race if i2c_dw_irq_handler_slave() is running on another CPU while clearing the dev->slave pointer. Reported-by: Krzysztof Adamski Reported-by: Wolfram Sang Signed-off-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-slave.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index e7f9305b2dd9..f5f001738df5 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -94,6 +94,7 @@ static int i2c_dw_unreg_slave(struct i2c_client *slave) dev->disable_int(dev); dev->disable(dev); + synchronize_irq(dev->irq); dev->slave = NULL; pm_runtime_put(dev->dev); -- cgit v1.2.1 From 7af0145067bc429a09ac4047b167c0971c9f0dc7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Aug 2019 00:31:34 +0200 Subject: x86/mm/cpa: Prevent large page split when ftrace flips RW on kernel text ftrace does not use text_poke() for enabling trace functionality. It uses its own mechanism and flips the whole kernel text to RW and back to RO. The CPA rework removed a loop based check of 4k pages which tried to preserve a large page by checking each 4k page whether the change would actually cover all pages in the large page. This resulted in endless loops for nothing as in testing it turned out that it actually never preserved anything. Of course testing missed to include ftrace, which is the one and only case which benefitted from the 4k loop. As a consequence enabling function tracing or ftrace based kprobes results in a full 4k split of the kernel text, which affects iTLB performance. The kernel RO protection is the only valid case where this can actually preserve large pages. All other static protections (RO data, data NX, PCI, BIOS) are truly static. So a conflict with those protections which results in a split should only ever happen when a change of memory next to a protected region is attempted. But these conflicts are rightfully splitting the large page to preserve the protected regions. In fact a change to the protected regions itself is a bug and is warned about. Add an exception for the static protection check for kernel text RO when the to be changed region spawns a full large page which allows to preserve the large mappings. This also prevents the syslog to be spammed about CPA violations when ftrace is used. The exception needs to be removed once ftrace switched over to text_poke() which avoids the whole issue. Fixes: 585948f4f695 ("x86/mm/cpa: Avoid the 4k pages check completely") Reported-by: Song Liu Signed-off-by: Thomas Gleixner Tested-by: Song Liu Reviewed-by: Song Liu Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908282355340.1938@nanos.tec.linutronix.de --- arch/x86/mm/pageattr.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 6a9a77a403c9..e14e95ea7338 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -516,7 +516,7 @@ static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val, */ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start, unsigned long pfn, unsigned long npg, - int warnlvl) + unsigned long lpsize, int warnlvl) { pgprotval_t forbidden, res; unsigned long end; @@ -535,9 +535,17 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start, check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX"); forbidden = res; - res = protect_kernel_text_ro(start, end); - check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO"); - forbidden |= res; + /* + * Special case to preserve a large page. If the change spawns the + * full large page mapping then there is no point to split it + * up. Happens with ftrace and is going to be removed once ftrace + * switched to text_poke(). + */ + if (lpsize != (npg * PAGE_SIZE) || (start & (lpsize - 1))) { + res = protect_kernel_text_ro(start, end); + check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO"); + forbidden |= res; + } /* Check the PFN directly */ res = protect_pci_bios(pfn, pfn + npg - 1); @@ -819,7 +827,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address, * extra conditional required here. */ chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages, - CPA_CONFLICT); + psize, CPA_CONFLICT); if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) { /* @@ -855,7 +863,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address, * protection requirement in the large page. */ new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages, - CPA_DETECT); + psize, CPA_DETECT); /* * If there is a conflict, split the large page. @@ -906,7 +914,8 @@ static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn, if (!cpa->force_static_prot) goto set; - prot = static_protections(ref_prot, address, pfn, npg, CPA_PROTECT); + /* Hand in lpsize = 0 to enforce the protection mechanism */ + prot = static_protections(ref_prot, address, pfn, npg, 0, CPA_PROTECT); if (pgprot_val(prot) == pgprot_val(ref_prot)) goto set; @@ -1503,7 +1512,8 @@ repeat: pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); cpa_inc_4k_install(); - new_prot = static_protections(new_prot, address, pfn, 1, + /* Hand in lpsize = 0 to enforce the protection mechanism */ + new_prot = static_protections(new_prot, address, pfn, 1, 0, CPA_PROTECT); new_prot = pgprot_clear_protnone_bits(new_prot); -- cgit v1.2.1 From c7c06a1532f3fe106687ac82a13492c6a619ff1c Mon Sep 17 00:00:00 2001 From: Andrew Cooks Date: Fri, 2 Aug 2019 14:52:46 +0200 Subject: i2c: piix4: Fix port selection for AMD Family 16h Model 30h Family 16h Model 30h SMBus controller needs the same port selection fix as described and fixed in commit 0fe16195f891 ("i2c: piix4: Fix SMBus port selection for AMD Family 17h chips") commit 6befa3fde65f ("i2c: piix4: Support alternative port selection register") also fixed the port selection for Hudson2, but unfortunately this is not the exact same device and the AMD naming and PCI Device IDs aren't particularly helpful here. The SMBus port selection register is common to the following Families and models, as documented in AMD's publicly available BIOS and Kernel Developer Guides: 50742 - Family 15h Model 60h-6Fh (PCI_DEVICE_ID_AMD_KERNCZ_SMBUS) 55072 - Family 15h Model 70h-7Fh (PCI_DEVICE_ID_AMD_KERNCZ_SMBUS) 52740 - Family 16h Model 30h-3Fh (PCI_DEVICE_ID_AMD_HUDSON2_SMBUS) The Hudson2 PCI Device ID (PCI_DEVICE_ID_AMD_HUDSON2_SMBUS) is shared between Bolton FCH and Family 16h Model 30h, but the location of the SmBus0Sel port selection bits are different: 51192 - Bolton Register Reference Guide We distinguish between Bolton and Family 16h Model 30h using the PCI Revision ID: Bolton is device 0x780b, revision 0x15 Family 16h Model 30h is device 0x780b, revision 0x1F Family 15h Model 60h and 70h are both device 0x790b, revision 0x4A. The following additional public AMD BKDG documents were checked and do not share the same port selection register: 42301 - Family 15h Model 00h-0Fh doesn't mention any 42300 - Family 15h Model 10h-1Fh doesn't mention any 49125 - Family 15h Model 30h-3Fh doesn't mention any 48751 - Family 16h Model 00h-0Fh uses the previously supported index register SB800_PIIX4_PORT_IDX_ALT at 0x2e Signed-off-by: Andrew Cooks Signed-off-by: Jean Delvare Cc: stable@vger.kernel.org [v4.6+] Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-piix4.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index c46c4bddc7ca..cba325eb852f 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -91,7 +91,7 @@ #define SB800_PIIX4_PORT_IDX_MASK 0x06 #define SB800_PIIX4_PORT_IDX_SHIFT 1 -/* On kerncz, SmBus0Sel is at bit 20:19 of PMx00 DecodeEn */ +/* On kerncz and Hudson2, SmBus0Sel is at bit 20:19 of PMx00 DecodeEn */ #define SB800_PIIX4_PORT_IDX_KERNCZ 0x02 #define SB800_PIIX4_PORT_IDX_MASK_KERNCZ 0x18 #define SB800_PIIX4_PORT_IDX_SHIFT_KERNCZ 3 @@ -358,18 +358,16 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, /* Find which register is used for port selection */ if (PIIX4_dev->vendor == PCI_VENDOR_ID_AMD || PIIX4_dev->vendor == PCI_VENDOR_ID_HYGON) { - switch (PIIX4_dev->device) { - case PCI_DEVICE_ID_AMD_KERNCZ_SMBUS: + if (PIIX4_dev->device == PCI_DEVICE_ID_AMD_KERNCZ_SMBUS || + (PIIX4_dev->device == PCI_DEVICE_ID_AMD_HUDSON2_SMBUS && + PIIX4_dev->revision >= 0x1F)) { piix4_port_sel_sb800 = SB800_PIIX4_PORT_IDX_KERNCZ; piix4_port_mask_sb800 = SB800_PIIX4_PORT_IDX_MASK_KERNCZ; piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT_KERNCZ; - break; - case PCI_DEVICE_ID_AMD_HUDSON2_SMBUS: - default: + } else { piix4_port_sel_sb800 = SB800_PIIX4_PORT_IDX_ALT; piix4_port_mask_sb800 = SB800_PIIX4_PORT_IDX_MASK; piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT; - break; } } else { if (!request_muxed_region(SB800_PIIX4_SMB_IDX, 2, -- cgit v1.2.1 From daf1de9078792a4d60e36aa7ecf3aadca65277c2 Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 28 Aug 2019 23:02:33 +0800 Subject: netfilter: nft_meta_bridge: Fix get NFT_META_BRI_IIFVPROTO in network byteorder Get the vlan_proto of ingress bridge in network byteorder as userspace expects. Otherwise this is inconsistent with NFT_META_PROTOCOL. Fixes: 2a3a93ef0ba5 ("netfilter: nft_meta_bridge: Add NFT_META_BRI_IIFVPROTO support") Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nft_meta_bridge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index 1804e867f715..7c9e92b2f806 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -53,7 +53,7 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr, goto err; br_vlan_get_proto(br_dev, &p_proto); - nft_reg_store16(dest, p_proto); + nft_reg_store16(dest, htons(p_proto)); return; } default: -- cgit v1.2.1 From 72741084d903e65e121c27bd29494d941729d4a1 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 27 Aug 2019 10:10:43 +0200 Subject: mmc: core: Fix init of SD cards reporting an invalid VDD range The OCR register defines the supported range of VDD voltages for SD cards. However, it has turned out that some SD cards reports an invalid voltage range, for example having bit7 set. When a host supports MMC_CAP2_FULL_PWR_CYCLE and some of the voltages from the invalid VDD range, this triggers the core to run a power cycle of the card to try to initialize it at the lowest common supported voltage. Obviously this fails, since the card can't support it. Let's fix this problem, by clearing invalid bits from the read OCR register for SD cards, before proceeding with the VDD voltage negotiation. Cc: stable@vger.kernel.org Reported-by: Philip Langdale Signed-off-by: Ulf Hansson Reviewed-by: Philip Langdale Tested-by: Philip Langdale Tested-by: Manuel Presnitz --- drivers/mmc/core/sd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index d681e8aaca83..fe914ff5f5d6 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -1292,6 +1292,12 @@ int mmc_attach_sd(struct mmc_host *host) goto err; } + /* + * Some SD cards claims an out of spec VDD voltage range. Let's treat + * these bits as being in-valid and especially also bit7. + */ + ocr &= ~0x7FFF; + rocr = mmc_select_voltage(host, ocr); /* -- cgit v1.2.1 From efdaf27517a892238e0dfa046cd91184b039d681 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Wed, 28 Aug 2019 10:17:32 +0800 Subject: mmc: sdhci-sprd: fixed incorrect clock divider The register SDHCI_CLOCK_CONTROL should be cleared before config clock divider, otherwise the frequency configured maybe lower than we expected. Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host controller") Signed-off-by: Chunyan Zhang Signed-off-by: Chunyan Zhang Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-sprd.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 83a4767ca680..25f2fc4ce08f 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -217,10 +217,11 @@ static inline void _sdhci_sprd_set_clock(struct sdhci_host *host, struct sdhci_sprd_host *sprd_host = TO_SPRD_HOST(host); u32 div, val, mask; - div = sdhci_sprd_calc_div(sprd_host->base_rate, clk); + sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL); - clk |= ((div & 0x300) >> 2) | ((div & 0xFF) << 8); - sdhci_enable_clk(host, clk); + div = sdhci_sprd_calc_div(sprd_host->base_rate, clk); + div = ((div & 0x300) >> 2) | ((div & 0xFF) << 8); + sdhci_enable_clk(host, div); /* enable auto gate sdhc_enable_auto_gate */ val = sdhci_readl(host, SDHCI_SPRD_REG_32_BUSY_POSI); -- cgit v1.2.1 From 4eae8cbdff942a423926486be4e781a77d619966 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Wed, 28 Aug 2019 10:17:33 +0800 Subject: mmc: sdhci-sprd: add get_ro hook function sprd's sd host controller doesn't support write protect to sd card. Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host controller") Signed-off-by: Chunyan Zhang Signed-off-by: Chunyan Zhang Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-sprd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 25f2fc4ce08f..ddc048e72385 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -374,6 +374,11 @@ static unsigned int sdhci_sprd_get_max_timeout_count(struct sdhci_host *host) return 1 << 31; } +static unsigned int sdhci_sprd_get_ro(struct sdhci_host *host) +{ + return 0; +} + static struct sdhci_ops sdhci_sprd_ops = { .read_l = sdhci_sprd_readl, .write_l = sdhci_sprd_writel, @@ -386,6 +391,7 @@ static struct sdhci_ops sdhci_sprd_ops = { .set_uhs_signaling = sdhci_sprd_set_uhs_signaling, .hw_reset = sdhci_sprd_hw_reset, .get_max_timeout_count = sdhci_sprd_get_max_timeout_count, + .get_ro = sdhci_sprd_get_ro, }; static void sdhci_sprd_request(struct mmc_host *mmc, struct mmc_request *mrq) -- cgit v1.2.1 From 6a526f66ab1494b63c71cd6639d9d96fd7216add Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Wed, 28 Aug 2019 10:17:34 +0800 Subject: mmc: sdhci-sprd: add SDHCI_QUIRK2_PRESET_VALUE_BROKEN The bit of PRESET_VAL_ENABLE in HOST_CONTROL2 register is reserved on sprd's sd host controller, set quirk2 to disable configuring this. Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host controller") Signed-off-by: Chunyan Zhang Signed-off-by: Chunyan Zhang Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-sprd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index ddc048e72385..130b75d37966 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -510,7 +510,8 @@ static void sdhci_sprd_phy_param_parse(struct sdhci_sprd_host *sprd_host, static const struct sdhci_pltfm_data sdhci_sprd_pdata = { .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK, .quirks2 = SDHCI_QUIRK2_BROKEN_HS200 | - SDHCI_QUIRK2_USE_32BIT_BLK_CNT, + SDHCI_QUIRK2_USE_32BIT_BLK_CNT | + SDHCI_QUIRK2_PRESET_VALUE_BROKEN, .ops = &sdhci_sprd_ops, }; -- cgit v1.2.1 From 4324e54bbea0107b054336f20075a26939b2bd51 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Wed, 28 Aug 2019 10:17:35 +0800 Subject: mms: sdhci-sprd: add SDHCI_QUIRK_BROKEN_CARD_DETECTION sprd's sd host controller doesn't support detection to card insert or remove. Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host controller") Signed-off-by: Chunyan Zhang Signed-off-by: Chunyan Zhang Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-sprd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 130b75d37966..ba777f0c77d1 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -508,7 +508,8 @@ static void sdhci_sprd_phy_param_parse(struct sdhci_sprd_host *sprd_host, } static const struct sdhci_pltfm_data sdhci_sprd_pdata = { - .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK, + .quirks = SDHCI_QUIRK_BROKEN_CARD_DETECTION | + SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK, .quirks2 = SDHCI_QUIRK2_BROKEN_HS200 | SDHCI_QUIRK2_USE_32BIT_BLK_CNT | SDHCI_QUIRK2_PRESET_VALUE_BROKEN, -- cgit v1.2.1 From 2f765c175e1d1acae911f889e71e5933c6488929 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Wed, 28 Aug 2019 10:17:36 +0800 Subject: mmc: sdhci-sprd: clear the UHS-I modes read from registers sprd's sd host controller supports SDR50/SDR104/DDR50 though, the UHS-I mode used by the specific card can be selected via devicetree only. Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host controller") Signed-off-by: Chunyan Zhang Signed-off-by: Chunyan Zhang Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-sprd.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index ba777f0c77d1..d07b9793380f 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -509,7 +509,8 @@ static void sdhci_sprd_phy_param_parse(struct sdhci_sprd_host *sprd_host, static const struct sdhci_pltfm_data sdhci_sprd_pdata = { .quirks = SDHCI_QUIRK_BROKEN_CARD_DETECTION | - SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK, + SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | + SDHCI_QUIRK_MISSING_CAPS, .quirks2 = SDHCI_QUIRK2_BROKEN_HS200 | SDHCI_QUIRK2_USE_32BIT_BLK_CNT | SDHCI_QUIRK2_PRESET_VALUE_BROKEN, @@ -614,6 +615,16 @@ static int sdhci_sprd_probe(struct platform_device *pdev) sdhci_enable_v4_mode(host); + /* + * Supply the existing CAPS, but clear the UHS-I modes. This + * will allow these modes to be specified only by device + * tree properties through mmc_of_parse(). + */ + host->caps = sdhci_readl(host, SDHCI_CAPABILITIES); + host->caps1 = sdhci_readl(host, SDHCI_CAPABILITIES_1); + host->caps1 &= ~(SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_SDR104 | + SDHCI_SUPPORT_DDR50); + ret = sdhci_setup_host(host); if (ret) goto pm_runtime_disable; -- cgit v1.2.1 From e73a3896eaca95ea5fc895720502a3f040eb4b39 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 29 Aug 2019 19:49:26 +0900 Subject: mmc: sdhci-cadence: enable v4_mode to fix ADMA 64-bit addressing The IP datasheet says this controller is compatible with SD Host Specification Version v4.00. As it turned out, the ADMA of this IP does not work with 64-bit mode when it is in the Version 3.00 compatible mode; it understands the old 64-bit descriptor table (as defined in SDHCI v2), but the ADMA System Address Register (SDHCI_ADMA_ADDRESS) cannot point to the 64-bit address. I noticed this issue only after commit bd2e75633c80 ("dma-contiguous: use fallback alloc_pages for single pages"). Prior to that commit, dma_set_mask_and_coherent() returned the dma address that fits in 32-bit range, at least for the default arm64 configuration (arch/arm64/configs/defconfig). Now the host->adma_addr exceeds the 32-bit limit, causing the real problem for the Socionext SoCs. (As a side-note, I was also able to reproduce the issue for older kernels by turning off CONFIG_DMA_CMA.) Call sdhci_enable_v4_mode() to fix this. Cc: # v4.20+ Signed-off-by: Masahiro Yamada Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-cadence.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-cadence.c b/drivers/mmc/host/sdhci-cadence.c index 163d1cf4367e..44139fceac24 100644 --- a/drivers/mmc/host/sdhci-cadence.c +++ b/drivers/mmc/host/sdhci-cadence.c @@ -369,6 +369,7 @@ static int sdhci_cdns_probe(struct platform_device *pdev) host->mmc_host_ops.execute_tuning = sdhci_cdns_execute_tuning; host->mmc_host_ops.hs400_enhanced_strobe = sdhci_cdns_hs400_enhanced_strobe; + sdhci_enable_v4_mode(host); sdhci_get_of_property(pdev); -- cgit v1.2.1 From 44d3bbb6f5e501b873218142fe08cdf62a4ac1f3 Mon Sep 17 00:00:00 2001 From: Josh Hunt Date: Mon, 19 Aug 2019 19:13:31 -0400 Subject: perf/x86/intel: Restrict period on Nehalem We see our Nehalem machines reporting 'perfevents: irq loop stuck!' in some cases when using perf: perfevents: irq loop stuck! WARNING: CPU: 0 PID: 3485 at arch/x86/events/intel/core.c:2282 intel_pmu_handle_irq+0x37b/0x530 ... RIP: 0010:intel_pmu_handle_irq+0x37b/0x530 ... Call Trace: ? perf_event_nmi_handler+0x2e/0x50 ? intel_pmu_save_and_restart+0x50/0x50 perf_event_nmi_handler+0x2e/0x50 nmi_handle+0x6e/0x120 default_do_nmi+0x3e/0x100 do_nmi+0x102/0x160 end_repeat_nmi+0x16/0x50 ... ? native_write_msr+0x6/0x20 ? native_write_msr+0x6/0x20 intel_pmu_enable_event+0x1ce/0x1f0 x86_pmu_start+0x78/0xa0 x86_pmu_enable+0x252/0x310 __perf_event_task_sched_in+0x181/0x190 ? __switch_to_asm+0x41/0x70 ? __switch_to_asm+0x35/0x70 ? __switch_to_asm+0x41/0x70 ? __switch_to_asm+0x35/0x70 finish_task_switch+0x158/0x260 __schedule+0x2f6/0x840 ? hrtimer_start_range_ns+0x153/0x210 schedule+0x32/0x80 schedule_hrtimeout_range_clock+0x8a/0x100 ? hrtimer_init+0x120/0x120 ep_poll+0x2f7/0x3a0 ? wake_up_q+0x60/0x60 do_epoll_wait+0xa9/0xc0 __x64_sys_epoll_wait+0x1a/0x20 do_syscall_64+0x4e/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fdeb1e96c03 ... Signed-off-by: Peter Zijlstra (Intel) Cc: acme@kernel.org Cc: Josh Hunt Cc: bpuranda@akamai.com Cc: mingo@redhat.com Cc: jolsa@redhat.com Cc: tglx@linutronix.de Cc: namhyung@kernel.org Cc: alexander.shishkin@linux.intel.com Link: https://lkml.kernel.org/r/1566256411-18820-1-git-send-email-johunt@akamai.com --- arch/x86/events/intel/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 648260b5f367..e4c2cb65ea50 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3572,6 +3572,11 @@ static u64 bdw_limit_period(struct perf_event *event, u64 left) return left; } +static u64 nhm_limit_period(struct perf_event *event, u64 left) +{ + return max(left, 32ULL); +} + PMU_FORMAT_ATTR(event, "config:0-7" ); PMU_FORMAT_ATTR(umask, "config:8-15" ); PMU_FORMAT_ATTR(edge, "config:18" ); @@ -4606,6 +4611,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; x86_pmu.enable_all = intel_pmu_nhm_enable_all; x86_pmu.extra_regs = intel_nehalem_extra_regs; + x86_pmu.limit_period = nhm_limit_period; mem_attr = nhm_mem_events_attrs; -- cgit v1.2.1 From 0f4cd769c410e2285a4e9873a684d90423f03090 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 26 Aug 2019 14:57:30 -0500 Subject: perf/x86/amd/ibs: Fix sample bias for dispatched micro-ops When counting dispatched micro-ops with cnt_ctl=1, in order to prevent sample bias, IBS hardware preloads the least significant 7 bits of current count (IbsOpCurCnt) with random values, such that, after the interrupt is handled and counting resumes, the next sample taken will be slightly perturbed. The current count bitfield is in the IBS execution control h/w register, alongside the maximum count field. Currently, the IBS driver writes that register with the maximum count, leaving zeroes to fill the current count field, thereby overwriting the random bits the hardware preloaded for itself. Fix the driver to actually retain and carry those random bits from the read of the IBS control register, through to its write, instead of overwriting the lower current count bits with zeroes. Tested with: perf record -c 100001 -e ibs_op/cnt_ctl=1/pp -a -C 0 taskset -c 0 'perf annotate' output before: 15.70 65: addsd %xmm0,%xmm1 17.30 add $0x1,%rax 15.88 cmp %rdx,%rax je 82 17.32 72: test $0x1,%al jne 7c 7.52 movapd %xmm1,%xmm0 5.90 jmp 65 8.23 7c: sqrtsd %xmm1,%xmm0 12.15 jmp 65 'perf annotate' output after: 16.63 65: addsd %xmm0,%xmm1 16.82 add $0x1,%rax 16.81 cmp %rdx,%rax je 82 16.69 72: test $0x1,%al jne 7c 8.30 movapd %xmm1,%xmm0 8.13 jmp 65 8.24 7c: sqrtsd %xmm1,%xmm0 8.39 jmp 65 Tested on Family 15h and 17h machines. Machines prior to family 10h Rev. C don't have the RDWROPCNT capability, and have the IbsOpCurCnt bitfield reserved, so this patch shouldn't affect their operation. It is unknown why commit db98c5faf8cb ("perf/x86: Implement 64-bit counter support for IBS") ignored the lower 4 bits of the IbsOpCurCnt field; the number of preloaded random bits has always been 7, AFAICT. Signed-off-by: Kim Phillips Signed-off-by: Peter Zijlstra (Intel) Cc: "Arnaldo Carvalho de Melo" Cc: Cc: Ingo Molnar Cc: Ingo Molnar Cc: Jiri Olsa Cc: Thomas Gleixner Cc: "Borislav Petkov" Cc: Stephane Eranian Cc: Alexander Shishkin Cc: "Namhyung Kim" Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20190826195730.30614-1-kim.phillips@amd.com --- arch/x86/events/amd/ibs.c | 13 ++++++++++--- arch/x86/include/asm/perf_event.h | 12 ++++++++---- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 62f317c9113a..5b35b7ea5d72 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -661,10 +661,17 @@ fail: throttle = perf_event_overflow(event, &data, ®s); out: - if (throttle) + if (throttle) { perf_ibs_stop(event, 0); - else - perf_ibs_enable_event(perf_ibs, hwc, period >> 4); + } else { + period >>= 4; + + if ((ibs_caps & IBS_CAPS_RDWROPCNT) && + (*config & IBS_OP_CNT_CTL)) + period |= *config & IBS_OP_CUR_CNT_RAND; + + perf_ibs_enable_event(perf_ibs, hwc, period); + } perf_event_update_userpage(event); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 1392d5e6e8d6..ee26e9215f18 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -252,16 +252,20 @@ struct pebs_lbr { #define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) #define IBSCTL_LVT_OFFSET_MASK 0x0F -/* ibs fetch bits/masks */ +/* IBS fetch bits/masks */ #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) #define IBS_FETCH_ENABLE (1ULL<<48) #define IBS_FETCH_CNT 0xFFFF0000ULL #define IBS_FETCH_MAX_CNT 0x0000FFFFULL -/* ibs op bits/masks */ -/* lower 4 bits of the current count are ignored: */ -#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32) +/* + * IBS op bits/masks + * The lower 7 bits of the current count are random bits + * preloaded by hardware and ignored in software + */ +#define IBS_OP_CUR_CNT (0xFFF80ULL<<32) +#define IBS_OP_CUR_CNT_RAND (0x0007FULL<<32) #define IBS_OP_CNT_CTL (1ULL<<19) #define IBS_OP_VAL (1ULL<<18) #define IBS_OP_ENABLE (1ULL<<17) -- cgit v1.2.1 From f08b2080e36057c9833596df8348c33daeb04d56 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Tue, 13 Aug 2019 09:09:13 +0300 Subject: MAINTAINERS: i2c mv64xxx: Update documentation path Update MAINTAINERS record to reflect the file move from i2c-mv64xxx.txt to marvell,mv64xxx-i2c.yaml. Fixes: f8bbde72ef44 ("dt-bindings: i2c: mv64xxx: Add YAML schemas") Signed-off-by: Denis Efremov Acked-by: Maxime Ripard Acked-by: Gregory CLEMENT Signed-off-by: Wolfram Sang --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9cbcf167bdd0..a21de6232ad1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7513,7 +7513,7 @@ I2C MV64XXX MARVELL AND ALLWINNER DRIVER M: Gregory CLEMENT L: linux-i2c@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt +F: Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml F: drivers/i2c/busses/i2c-mv64xxx.c I2C OVER PARALLEL PORT -- cgit v1.2.1 From b3d604d405166edfd4e1e6053409b85008f4f56d Mon Sep 17 00:00:00 2001 From: Lori Hikichi Date: Thu, 8 Aug 2019 09:07:52 +0530 Subject: i2c: iproc: Stop advertising support of SMBUS quick cmd The driver does not support the SMBUS Quick command so remove the flag that indicates that level of support. By default the i2c_detect tool uses the quick command to try and detect devices at some bus addresses. If the quick command is used then we will not detect the device, even though it is present. Fixes: e6e5dd3566e0 (i2c: iproc: Add Broadcom iProc I2C Driver) Signed-off-by: Lori Hikichi Signed-off-by: Rayagonda Kokatanur Reviewed-by: Ray Jui Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-bcm-iproc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c index d7fd76baec92..19ef2b0c682a 100644 --- a/drivers/i2c/busses/i2c-bcm-iproc.c +++ b/drivers/i2c/busses/i2c-bcm-iproc.c @@ -790,7 +790,10 @@ static int bcm_iproc_i2c_xfer(struct i2c_adapter *adapter, static uint32_t bcm_iproc_i2c_functionality(struct i2c_adapter *adap) { - u32 val = I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; + u32 val; + + /* We do not support the SMBUS Quick command */ + val = I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK); if (adap->algo->reg_slave) val |= I2C_FUNC_SLAVE; -- cgit v1.2.1 From abf4923e97c3abbbd1e59f0e13c7c214c93c6aaa Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Thu, 22 Aug 2019 17:45:17 +0800 Subject: i2c: mediatek: disable zero-length transfers for mt8183 Quoting from mt8183 datasheet, the number of transfers to be transferred in one transaction should be set to bigger than 1, so we should forbid zero-length transfer and update functionality. Reported-by: Alexandru M Stan Signed-off-by: Hsin-Yi Wang Reviewed-by: Qii Wang [wsa: shortened commit message a little] Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 252edb433fdf..29eae1bf4f86 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -234,6 +234,10 @@ static const struct i2c_adapter_quirks mt7622_i2c_quirks = { .max_num_msgs = 255, }; +static const struct i2c_adapter_quirks mt8183_i2c_quirks = { + .flags = I2C_AQ_NO_ZERO_LEN, +}; + static const struct mtk_i2c_compatible mt2712_compat = { .regs = mt_i2c_regs_v1, .pmic_i2c = 0, @@ -298,6 +302,7 @@ static const struct mtk_i2c_compatible mt8173_compat = { }; static const struct mtk_i2c_compatible mt8183_compat = { + .quirks = &mt8183_i2c_quirks, .regs = mt_i2c_regs_v2, .pmic_i2c = 0, .dcm = 0, @@ -870,7 +875,11 @@ static irqreturn_t mtk_i2c_irq(int irqno, void *dev_id) static u32 mtk_i2c_functionality(struct i2c_adapter *adap) { - return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; + if (adap->quirks->flags & I2C_AQ_NO_ZERO_LEN) + return I2C_FUNC_I2C | + (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK); + else + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; } static const struct i2c_algorithm mtk_i2c_algorithm = { -- cgit v1.2.1 From 846d2db3e00048da3f650e0cfb0b8d67669cec3e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 30 Aug 2019 16:52:26 +0100 Subject: keys: ensure that ->match_free() is called in request_key_and_link() If check_cached_key() returns a non-NULL value, we still need to call key_type::match_free() to undo key_type::match_preparse(). Fixes: 7743c48e54ee ("keys: Cache result of request_key*() temporarily in task_struct") Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- security/keys/request_key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 7325f382dbf4..957b9e3e1492 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -595,7 +595,7 @@ struct key *request_key_and_link(struct key_type *type, key = check_cached_key(&ctx); if (key) - return key; + goto error_free; /* search all the process keyrings for a key */ rcu_read_lock(); -- cgit v1.2.1 From 7bd46644ea0f6021dc396a39a8bfd3a58f6f1f9f Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 4 Jul 2019 20:04:41 +0530 Subject: ftrace: Fix NULL pointer dereference in t_probe_next() LTP testsuite on powerpc results in the below crash: Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xc00000000029d800 Oops: Kernel access of bad area, sig: 11 [#1] LE SMP NR_CPUS=2048 NUMA PowerNV ... CPU: 68 PID: 96584 Comm: cat Kdump: loaded Tainted: G W NIP: c00000000029d800 LR: c00000000029dac4 CTR: c0000000001e6ad0 REGS: c0002017fae8ba10 TRAP: 0300 Tainted: G W MSR: 9000000000009033 CR: 28022422 XER: 20040000 CFAR: c00000000029d90c DAR: 0000000000000000 DSISR: 40000000 IRQMASK: 0 ... NIP [c00000000029d800] t_probe_next+0x60/0x180 LR [c00000000029dac4] t_mod_start+0x1a4/0x1f0 Call Trace: [c0002017fae8bc90] [c000000000cdbc40] _cond_resched+0x10/0xb0 (unreliable) [c0002017fae8bce0] [c0000000002a15b0] t_start+0xf0/0x1c0 [c0002017fae8bd30] [c0000000004ec2b4] seq_read+0x184/0x640 [c0002017fae8bdd0] [c0000000004a57bc] sys_read+0x10c/0x300 [c0002017fae8be30] [c00000000000b388] system_call+0x5c/0x70 The test (ftrace_set_ftrace_filter.sh) is part of ftrace stress tests and the crash happens when the test does 'cat $TRACING_PATH/set_ftrace_filter'. The address points to the second line below, in t_probe_next(), where filter_hash is dereferenced: hash = iter->probe->ops.func_hash->filter_hash; size = 1 << hash->size_bits; This happens due to a race with register_ftrace_function_probe(). A new ftrace_func_probe is created and added into the func_probes list in trace_array under ftrace_lock. However, before initializing the filter, we drop ftrace_lock, and re-acquire it after acquiring regex_lock. If another process is trying to read set_ftrace_filter, it will be able to acquire ftrace_lock during this window and it will end up seeing a NULL filter_hash. Fix this by just checking for a NULL filter_hash in t_probe_next(). If the filter_hash is NULL, then this probe is just being added and we can simply return from here. Link: http://lkml.kernel.org/r/05e021f757625cbbb006fad41380323dbe4e3b43.1562249521.git.naveen.n.rao@linux.vnet.ibm.com Cc: stable@vger.kernel.org Fixes: 7b60f3d876156 ("ftrace: Dynamically create the probe ftrace_ops for the trace_array") Signed-off-by: Naveen N. Rao Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index eca34503f178..80beed2cf0da 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3095,6 +3095,10 @@ t_probe_next(struct seq_file *m, loff_t *pos) hnd = &iter->probe_entry->hlist; hash = iter->probe->ops.func_hash->filter_hash; + + if (!hash) + return NULL; + size = 1 << hash->size_bits; retry: -- cgit v1.2.1 From 372e0d01da71c84dcecf7028598a33813b0d5256 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 30 Aug 2019 16:30:01 -0400 Subject: ftrace: Check for empty hash and comment the race with registering probes The race between adding a function probe and reading the probes that exist is very subtle. It needs a comment. Also, the issue can also happen if the probe has has the EMPTY_HASH as its func_hash. Cc: stable@vger.kernel.org Fixes: 7b60f3d876156 ("ftrace: Dynamically create the probe ftrace_ops for the trace_array") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 80beed2cf0da..6200a6fe10e3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3096,7 +3096,11 @@ t_probe_next(struct seq_file *m, loff_t *pos) hash = iter->probe->ops.func_hash->filter_hash; - if (!hash) + /* + * A probe being registered may temporarily have an empty hash + * and it's at the end of the func_probes list. + */ + if (!hash || hash == EMPTY_HASH) return NULL; size = 1 << hash->size_bits; @@ -4324,6 +4328,10 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, mutex_unlock(&ftrace_lock); + /* + * Note, there's a small window here that the func_hash->filter_hash + * may be NULL or empty. Need to be carefule when reading the loop. + */ mutex_lock(&probe->ops.func_hash->regex_lock); orig_hash = &probe->ops.func_hash->filter_hash; -- cgit v1.2.1 From 5b0022dd32b7c2e15edf1827ba80aa1407edf9ff Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 4 Jul 2019 20:04:42 +0530 Subject: ftrace: Check for successful allocation of hash In register_ftrace_function_probe(), we are not checking the return value of alloc_and_copy_ftrace_hash(). The subsequent call to ftrace_match_records() may end up dereferencing the same. Add a check to ensure this doesn't happen. Link: http://lkml.kernel.org/r/26e92574f25ad23e7cafa3cf5f7a819de1832cbe.1562249521.git.naveen.n.rao@linux.vnet.ibm.com Cc: stable@vger.kernel.org Fixes: 1ec3a81a0cf42 ("ftrace: Have each function probe use its own ftrace_ops") Signed-off-by: Naveen N. Rao Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6200a6fe10e3..f9821a3374e9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4338,6 +4338,11 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, old_hash = *orig_hash; hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash); + if (!hash) { + ret = -ENOMEM; + goto out; + } + ret = ftrace_match_records(hash, glob, strlen(glob)); /* Nothing found? */ -- cgit v1.2.1 From b6b4dc4c1fa7f1c99398e7dc85758049645e9588 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 29 Aug 2019 10:46:00 +0800 Subject: amd-xgbe: Fix error path in xgbe_mod_init() In xgbe_mod_init(), we should do cleanup if some error occurs Reported-by: Hulk Robot Fixes: efbaa828330a ("amd-xgbe: Add support to handle device renaming") Fixes: 47f164deab22 ("amd-xgbe: Add PCI device support") Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index b41f23679a08..7ce9c69e9c44 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -469,13 +469,19 @@ static int __init xgbe_mod_init(void) ret = xgbe_platform_init(); if (ret) - return ret; + goto err_platform_init; ret = xgbe_pci_init(); if (ret) - return ret; + goto err_pci_init; return 0; + +err_pci_init: + xgbe_platform_exit(); +err_platform_init: + unregister_netdevice_notifier(&xgbe_netdev_notifier); + return ret; } static void __exit xgbe_mod_exit(void) -- cgit v1.2.1 From 3b25528e1e355c803e73aa326ce657b5606cda73 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 29 Aug 2019 11:17:24 +0800 Subject: net: stmmac: dwmac-rk: Don't fail if phy regulator is absent The devicetree binding lists the phy phy as optional. As such, the driver should not bail out if it can't find a regulator. Instead it should just skip the remaining regulator related code and continue on normally. Skip the remainder of phy_power_on() if a regulator supply isn't available. This also gets rid of the bogus return code. Fixes: 2e12f536635f ("net: stmmac: dwmac-rk: Use standard devicetree property for phy regulator") Signed-off-by: Chen-Yu Tsai Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 4644b2aeeba1..e2e469c37a4d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1194,10 +1194,8 @@ static int phy_power_on(struct rk_priv_data *bsp_priv, bool enable) int ret; struct device *dev = &bsp_priv->pdev->dev; - if (!ldo) { - dev_err(dev, "no regulator found\n"); - return -1; - } + if (!ldo) + return 0; if (enable) { ret = regulator_enable(ldo); -- cgit v1.2.1 From d12040b6933f684a26773afad46dbba9778608d7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Aug 2019 14:12:11 +0100 Subject: rxrpc: Fix lack of conn cleanup when local endpoint is cleaned up [ver #2] When a local endpoint is ceases to be in use, such as when the kafs module is unloaded, the kernel will emit an assertion failure if there are any outstanding client connections: rxrpc: Assertion failed ------------[ cut here ]------------ kernel BUG at net/rxrpc/local_object.c:433! and even beyond that, will evince other oopses if there are service connections still present. Fix this by: (1) Removing the triggering of connection reaping when an rxrpc socket is released. These don't actually clean up the connections anyway - and further, the local endpoint may still be in use through another socket. (2) Mark the local endpoint as dead when we start the process of tearing it down. (3) When destroying a local endpoint, strip all of its client connections from the idle list and discard the ref on each that the list was holding. (4) When destroying a local endpoint, call the service connection reaper directly (rather than through a workqueue) to immediately kill off all outstanding service connections. (5) Make the service connection reaper reap connections for which the local endpoint is marked dead. Only after destroying the connections can we close the socket lest we get an oops in a workqueue that's looking at a connection or a peer. Fixes: 3d18cbb7fd0c ("rxrpc: Fix conn expiry timers") Signed-off-by: David Howells Tested-by: Marc Dionne Signed-off-by: David S. Miller --- net/rxrpc/af_rxrpc.c | 3 --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/conn_client.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ net/rxrpc/conn_object.c | 2 +- net/rxrpc/local_object.c | 5 ++++- 5 files changed, 50 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 0dbbfd1b6487..d72ddb67bb74 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -862,7 +862,6 @@ static void rxrpc_sock_destructor(struct sock *sk) static int rxrpc_release_sock(struct sock *sk) { struct rxrpc_sock *rx = rxrpc_sk(sk); - struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); @@ -898,8 +897,6 @@ static int rxrpc_release_sock(struct sock *sk) rxrpc_release_calls_on_socket(rx); flush_workqueue(rxrpc_workqueue); rxrpc_purge_queue(&sk->sk_receive_queue); - rxrpc_queue_work(&rxnet->service_conn_reaper); - rxrpc_queue_work(&rxnet->client_conn_reaper); rxrpc_unuse_local(rx->local); rx->local = NULL; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 852e58781fda..8051dfdcf26d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -910,6 +910,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *); void rxrpc_put_client_conn(struct rxrpc_connection *); void rxrpc_discard_expired_client_conns(struct work_struct *); void rxrpc_destroy_all_client_connections(struct rxrpc_net *); +void rxrpc_clean_up_local_conns(struct rxrpc_local *); /* * conn_event.c diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index aea82f909c60..3f1da1b49f69 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -1162,3 +1162,47 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet) _leave(""); } + +/* + * Clean up the client connections on a local endpoint. + */ +void rxrpc_clean_up_local_conns(struct rxrpc_local *local) +{ + struct rxrpc_connection *conn, *tmp; + struct rxrpc_net *rxnet = local->rxnet; + unsigned int nr_active; + LIST_HEAD(graveyard); + + _enter(""); + + spin_lock(&rxnet->client_conn_cache_lock); + nr_active = rxnet->nr_active_client_conns; + + list_for_each_entry_safe(conn, tmp, &rxnet->idle_client_conns, + cache_link) { + if (conn->params.local == local) { + ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_IDLE); + + trace_rxrpc_client(conn, -1, rxrpc_client_discard); + if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags)) + BUG(); + conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; + list_move(&conn->cache_link, &graveyard); + nr_active--; + } + } + + rxnet->nr_active_client_conns = nr_active; + spin_unlock(&rxnet->client_conn_cache_lock); + ASSERTCMP(nr_active, >=, 0); + + while (!list_empty(&graveyard)) { + conn = list_entry(graveyard.next, + struct rxrpc_connection, cache_link); + list_del_init(&conn->cache_link); + + rxrpc_put_connection(conn); + } + + _leave(" [culled]"); +} diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 434ef392212b..ed05b6922132 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -398,7 +398,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) continue; - if (rxnet->live) { + if (rxnet->live && !conn->params.local->dead) { idle_timestamp = READ_ONCE(conn->idle_timestamp); expire_at = idle_timestamp + rxrpc_connection_expiry * HZ; if (conn->params.local->service_closed) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 72a6e12a9304..36587260cabd 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -426,11 +426,14 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) _enter("%d", local->debug_id); + local->dead = true; + mutex_lock(&rxnet->local_mutex); list_del_init(&local->link); mutex_unlock(&rxnet->local_mutex); - ASSERT(RB_EMPTY_ROOT(&local->client_conns)); + rxrpc_clean_up_local_conns(local); + rxrpc_service_connection_reaper(&rxnet->service_conn_reaper); ASSERT(!local->service); if (socket) { -- cgit v1.2.1 From bee07b33db78d4ee7ed6a2fe810b9473d5471fe4 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 30 Aug 2019 16:04:32 -0700 Subject: mm: memcontrol: flush percpu slab vmstats on kmem offlining I've noticed that the "slab" value in memory.stat is sometimes 0, even if some children memory cgroups have a non-zero "slab" value. The following investigation showed that this is the result of the kmem_cache reparenting in combination with the per-cpu batching of slab vmstats. At the offlining some vmstat value may leave in the percpu cache, not being propagated upwards by the cgroup hierarchy. It means that stats on ancestor levels are lower than actual. Later when slab pages are released, the precise number of pages is substracted on the parent level, making the value negative. We don't show negative values, 0 is printed instead. To fix this issue, let's flush percpu slab memcg and lruvec stats on memcg offlining. This guarantees that numbers on all ancestor levels are accurate and match the actual number of outstanding slab pages. Link: http://lkml.kernel.org/r/20190819202338.363363-3-guro@fb.com Fixes: fb2f2b0adb98 ("mm: memcg/slab: reparent memcg kmem_caches on cgroup removal") Signed-off-by: Roman Gushchin Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 5 +++-- mm/memcontrol.c | 35 +++++++++++++++++++++++++++-------- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d77d717c620c..3f38c30d2f13 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -215,8 +215,9 @@ enum node_stat_item { NR_INACTIVE_FILE, /* " " " " " */ NR_ACTIVE_FILE, /* " " " " " */ NR_UNEVICTABLE, /* " " " " " */ - NR_SLAB_RECLAIMABLE, - NR_SLAB_UNRECLAIMABLE, + NR_SLAB_RECLAIMABLE, /* Please do not reorder this item */ + NR_SLAB_UNRECLAIMABLE, /* and this one without looking at + * memcg_flush_percpu_vmstats() first. */ NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ WORKINGSET_NODES, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 26e2999af608..1f585d6c77c1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3260,37 +3260,49 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, } } -static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg) +static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg, bool slab_only) { unsigned long stat[MEMCG_NR_STAT]; struct mem_cgroup *mi; int node, cpu, i; + int min_idx, max_idx; - for (i = 0; i < MEMCG_NR_STAT; i++) + if (slab_only) { + min_idx = NR_SLAB_RECLAIMABLE; + max_idx = NR_SLAB_UNRECLAIMABLE; + } else { + min_idx = 0; + max_idx = MEMCG_NR_STAT; + } + + for (i = min_idx; i < max_idx; i++) stat[i] = 0; for_each_online_cpu(cpu) - for (i = 0; i < MEMCG_NR_STAT; i++) + for (i = min_idx; i < max_idx; i++) stat[i] += raw_cpu_read(memcg->vmstats_percpu->stat[i]); for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) - for (i = 0; i < MEMCG_NR_STAT; i++) + for (i = min_idx; i < max_idx; i++) atomic_long_add(stat[i], &mi->vmstats[i]); + if (!slab_only) + max_idx = NR_VM_NODE_STAT_ITEMS; + for_each_node(node) { struct mem_cgroup_per_node *pn = memcg->nodeinfo[node]; struct mem_cgroup_per_node *pi; - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) + for (i = min_idx; i < max_idx; i++) stat[i] = 0; for_each_online_cpu(cpu) - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) + for (i = min_idx; i < max_idx; i++) stat[i] += raw_cpu_read( pn->lruvec_stat_cpu->count[i]); for (pi = pn; pi; pi = parent_nodeinfo(pi, node)) - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) + for (i = min_idx; i < max_idx; i++) atomic_long_add(stat[i], &pi->lruvec_stat[i]); } } @@ -3363,7 +3375,14 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg) if (!parent) parent = root_mem_cgroup; + /* + * Deactivate and reparent kmem_caches. Then flush percpu + * slab statistics to have precise values at the parent and + * all ancestor levels. It's required to keep slab stats + * accurate after the reparenting of kmem_caches. + */ memcg_deactivate_kmem_caches(memcg, parent); + memcg_flush_percpu_vmstats(memcg, true); kmemcg_id = memcg->kmemcg_id; BUG_ON(kmemcg_id < 0); @@ -4740,7 +4759,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) * Flush percpu vmstats and vmevents to guarantee the value correctness * on parent's and all ancestor levels. */ - memcg_flush_percpu_vmstats(memcg); + memcg_flush_percpu_vmstats(memcg, false); memcg_flush_percpu_vmevents(memcg); for_each_node(node) free_mem_cgroup_per_node_info(memcg, node); -- cgit v1.2.1 From 441e254cd40dc03beec3c650ce6ce6074bc6517f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 30 Aug 2019 16:04:35 -0700 Subject: mm/zsmalloc.c: fix build when CONFIG_COMPACTION=n Fixes: 701d678599d0c1 ("mm/zsmalloc.c: fix race condition in zs_destroy_pool") Link: http://lkml.kernel.org/r/201908251039.5oSbEEUT%25lkp@intel.com Reported-by: kbuild test robot Cc: Sergey Senozhatsky Cc: Henry Burns Cc: Minchan Kim Cc: Shakeel Butt Cc: Jonathan Adams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/zsmalloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 08def3a0d200..e98bb6ab4f7e 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -2412,7 +2412,9 @@ struct zs_pool *zs_create_pool(const char *name) if (!pool->name) goto err; +#ifdef CONFIG_COMPACTION init_waitqueue_head(&pool->migration_wait); +#endif if (create_cache(pool)) goto err; -- cgit v1.2.1 From b4c46484dc3fa3721d68fdfae85c1d7b1f6b5472 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 30 Aug 2019 16:04:39 -0700 Subject: mm, memcg: partially revert "mm/memcontrol.c: keep local VM counters in sync with the hierarchical ones" Commit 766a4c19d880 ("mm/memcontrol.c: keep local VM counters in sync with the hierarchical ones") effectively decreased the precision of per-memcg vmstats_local and per-memcg-per-node lruvec percpu counters. That's good for displaying in memory.stat, but brings a serious regression into the reclaim process. One issue I've discovered and debugged is the following: lruvec_lru_size() can return 0 instead of the actual number of pages in the lru list, preventing the kernel to reclaim last remaining pages. Result is yet another dying memory cgroups flooding. The opposite is also happening: scanning an empty lru list is the waste of cpu time. Also, inactive_list_is_low() can return incorrect values, preventing the active lru from being scanned and freed. It can fail both because the size of active and inactive lists are inaccurate, and because the number of workingset refaults isn't precise. In other words, the result is pretty random. I'm not sure, if using the approximate number of slab pages in count_shadow_number() is acceptable, but issues described above are enough to partially revert the patch. Let's keep per-memcg vmstat_local batched (they are only used for displaying stats to the userspace), but keep lruvec stats precise. This change fixes the dead memcg flooding on my setup. Link: http://lkml.kernel.org/r/20190817004726.2530670-1-guro@fb.com Fixes: 766a4c19d880 ("mm/memcontrol.c: keep local VM counters in sync with the hierarchical ones") Signed-off-by: Roman Gushchin Acked-by: Yafang Shao Cc: Johannes Weiner Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1f585d6c77c1..a247cb163245 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -752,15 +752,13 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, /* Update memcg */ __mod_memcg_state(memcg, idx, val); + /* Update lruvec */ + __this_cpu_add(pn->lruvec_stat_local->count[idx], val); + x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { struct mem_cgroup_per_node *pi; - /* - * Batch local counters to keep them in sync with - * the hierarchical ones. - */ - __this_cpu_add(pn->lruvec_stat_local->count[idx], x); for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id)) atomic_long_add(x, &pi->lruvec_stat[idx]); x = 0; -- cgit v1.2.1 From 14108b9131a47ff18a3c640f583eb2d625c75c0d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 30 Aug 2019 16:04:43 -0700 Subject: mm/z3fold.c: fix lock/unlock imbalance in z3fold_page_isolate Fix lock/unlock imbalance by unlocking *zhdr* before return. Addresses Coverity ID 1452811 ("Missing unlock") Link: http://lkml.kernel.org/r/20190826030634.GA4379@embeddedor Fixes: d776aaa9895e ("mm/z3fold.c: fix race between migration and destruction") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Andrew Morton Cc: Henry Burns Cc: Vitaly Wool Cc: Shakeel Butt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/z3fold.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/z3fold.c b/mm/z3fold.c index e31cd9bd4ed5..75b7962439ff 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -1406,6 +1406,7 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) * should freak out. */ WARN(1, "Z3fold is experiencing kref problems\n"); + z3fold_page_unlock(zhdr); return false; } z3fold_page_unlock(zhdr); -- cgit v1.2.1 From a6c135bb1a59b5d67c8c45b214d3427d65dd7c00 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 30 Aug 2019 16:04:46 -0700 Subject: mailmap: add aliases for Dmitry Safonov I don't work for Virtuozzo or Samsung anymore and I've noticed that they have started sending annoying html email-replies. And I prioritize my personal emails over work email box, so while at it add an entry for Arista too - so I can reply faster when needed. Link: http://lkml.kernel.org/r/20190827220346.11123-1-dima@arista.com Signed-off-by: Dmitry Safonov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.mailmap b/.mailmap index ebdca3fba91f..afaad605284a 100644 --- a/.mailmap +++ b/.mailmap @@ -64,6 +64,9 @@ Dengcheng Zhu Dengcheng Zhu Dengcheng Zhu Dmitry Eremin-Solenikov +Dmitry Safonov <0x7f454c46@gmail.com> +Dmitry Safonov <0x7f454c46@gmail.com> +Dmitry Safonov <0x7f454c46@gmail.com> Domen Puncer Douglas Gilbert Ed L. Cashin -- cgit v1.2.1 From d2e5fb927ee7f52c1fe2a98b554881e9dffd8514 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 30 Aug 2019 16:04:50 -0700 Subject: mm, memcg: do not set reclaim_state on soft limit reclaim Adric Blake has noticed[1] the following warning: WARNING: CPU: 7 PID: 175 at mm/vmscan.c:245 set_task_reclaim_state+0x1e/0x40 [...] Call Trace: mem_cgroup_shrink_node+0x9b/0x1d0 mem_cgroup_soft_limit_reclaim+0x10c/0x3a0 balance_pgdat+0x276/0x540 kswapd+0x200/0x3f0 ? wait_woken+0x80/0x80 kthread+0xfd/0x130 ? balance_pgdat+0x540/0x540 ? kthread_park+0x80/0x80 ret_from_fork+0x35/0x40 ---[ end trace 727343df67b2398a ]--- which tells us that soft limit reclaim is about to overwrite the reclaim_state configured up in the call chain (kswapd in this case but the direct reclaim is equally possible). This means that reclaim stats would get misleading once the soft reclaim returns and another reclaim is done. Fix the warning by dropping set_task_reclaim_state from the soft reclaim which is always called with reclaim_state set up. [1] http://lkml.kernel.org/r/CAE1jjeePxYPvw1mw2B3v803xHVR_BNnz0hQUY_JDMN8ny29M6w@mail.gmail.com Link: http://lkml.kernel.org/r/20190828071808.20410-1-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Adric Blake Acked-by: Yafang Shao Acked-by: Yang Shi Cc: Johannes Weiner Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index c77d1e3761a7..a6c5d0b28321 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3220,6 +3220,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #ifdef CONFIG_MEMCG +/* Only used by soft limit reclaim. Do not reuse for anything else. */ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, gfp_t gfp_mask, bool noswap, pg_data_t *pgdat, @@ -3235,7 +3236,8 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, }; unsigned long lru_pages; - set_task_reclaim_state(current, &sc.reclaim_state); + WARN_ON_ONCE(!current->reclaim_state); + sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); @@ -3253,7 +3255,6 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); - set_task_reclaim_state(current, NULL); *nr_scanned = sc.nr_scanned; return sc.nr_reclaimed; -- cgit v1.2.1 From 6c1c280805ded72eceb2afc1a0d431b256608554 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Fri, 30 Aug 2019 16:04:53 -0700 Subject: mm: memcontrol: fix percpu vmstats and vmevents flush Instead of using raw_cpu_read() use per_cpu() to read the actual data of the corresponding cpu otherwise we will be reading the data of the current cpu for the number of online CPUs. Link: http://lkml.kernel.org/r/20190829203110.129263-1-shakeelb@google.com Fixes: bb65f89b7d3d ("mm: memcontrol: flush percpu vmevents before releasing memcg") Fixes: c350a99ea2b1 ("mm: memcontrol: flush percpu vmstats before releasing memcg") Signed-off-by: Shakeel Butt Acked-by: Roman Gushchin Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a247cb163245..9ec5e12486a7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3278,7 +3278,7 @@ static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg, bool slab_only) for_each_online_cpu(cpu) for (i = min_idx; i < max_idx; i++) - stat[i] += raw_cpu_read(memcg->vmstats_percpu->stat[i]); + stat[i] += per_cpu(memcg->vmstats_percpu->stat[i], cpu); for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) for (i = min_idx; i < max_idx; i++) @@ -3296,8 +3296,8 @@ static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg, bool slab_only) for_each_online_cpu(cpu) for (i = min_idx; i < max_idx; i++) - stat[i] += raw_cpu_read( - pn->lruvec_stat_cpu->count[i]); + stat[i] += per_cpu( + pn->lruvec_stat_cpu->count[i], cpu); for (pi = pn; pi; pi = parent_nodeinfo(pi, node)) for (i = min_idx; i < max_idx; i++) @@ -3316,8 +3316,8 @@ static void memcg_flush_percpu_vmevents(struct mem_cgroup *memcg) for_each_online_cpu(cpu) for (i = 0; i < NR_VM_EVENT_ITEMS; i++) - events[i] += raw_cpu_read( - memcg->vmstats_percpu->events[i]); + events[i] += per_cpu(memcg->vmstats_percpu->events[i], + cpu); for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) for (i = 0; i < NR_VM_EVENT_ITEMS; i++) -- cgit v1.2.1 From ab9bb6318b0967671e0c9b6537c1537d51ca4f45 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 30 Aug 2019 18:47:15 -0700 Subject: Partially revert "kfifo: fix kfifo_alloc() and kfifo_init()" Commit dfe2a77fd243 ("kfifo: fix kfifo_alloc() and kfifo_init()") made the kfifo code round the number of elements up. That was good for __kfifo_alloc(), but it's actually wrong for __kfifo_init(). The difference? __kfifo_alloc() will allocate the rounded-up number of elements, but __kfifo_init() uses an allocation done by the caller. We can't just say "use more elements than the caller allocated", and have to round down. The good news? All the normal cases will be using power-of-two arrays anyway, and most users of kfifo's don't use kfifo_init() at all, but one of the helper macros to declare a KFIFO that enforce the proper power-of-two behavior. But it looks like at least ibmvscsis might be affected. The bad news? Will Deacon refers to an old thread and points points out that the memory ordering in kfifo's is questionable. See https://lore.kernel.org/lkml/20181211034032.32338-1-yuleixzhang@tencent.com/ for more. Fixes: dfe2a77fd243 ("kfifo: fix kfifo_alloc() and kfifo_init()") Reported-by: laokz Cc: Stefani Seibold Cc: Andrew Morton Cc: Dan Carpenter Cc: Greg KH Cc: Kees Cook Cc: Will Deacon Signed-off-by: Linus Torvalds --- lib/kfifo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/kfifo.c b/lib/kfifo.c index 117ad0e7fbf4..70dab9ac7827 100644 --- a/lib/kfifo.c +++ b/lib/kfifo.c @@ -68,7 +68,8 @@ int __kfifo_init(struct __kfifo *fifo, void *buffer, { size /= esize; - size = roundup_pow_of_two(size); + if (!is_power_of_2(size)) + size = rounddown_pow_of_two(size); fifo->in = 0; fifo->out = 0; -- cgit v1.2.1 From 595a438c78dbdc43d6c9db4f437267f0bd1548bf Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Thu, 4 Jul 2019 20:21:10 +0300 Subject: tracing: Make exported ftrace_set_clr_event non-static The function ftrace_set_clr_event is declared static and marked EXPORT_SYMBOL_GPL(), which is at best an odd combination. Because the function was decided to be a part of API, this commit removes the static attribute and adds the declaration to the header. Link: http://lkml.kernel.org/r/20190704172110.27041-1-efremov@linux.com Fixes: f45d1225adb04 ("tracing: Kernel access to Ftrace instances") Reviewed-by: Joe Jin Signed-off-by: Denis Efremov Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 1 + kernel/trace/trace_events.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 5150436783e8..30a8cdcfd4a4 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -548,6 +548,7 @@ extern int trace_event_get_offsets(struct trace_event_call *call); #define is_signed_type(type) (((type)(-1)) < (type)1) +int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); int trace_set_clr_event(const char *system, const char *event, int set); /* diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index c7506bc81b75..648930823b57 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -787,7 +787,7 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, return ret; } -static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) +int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) { char *event = NULL, *sub = NULL, *match; int ret; -- cgit v1.2.1 From 19a58ce1dc72264b9d50ff6d86cc36b3c439fb64 Mon Sep 17 00:00:00 2001 From: Xinpeng Liu Date: Thu, 8 Aug 2019 07:29:23 +0800 Subject: tracing/probe: Fix null pointer dereference BUG: KASAN: null-ptr-deref in trace_probe_cleanup+0x8d/0xd0 Read of size 8 at addr 0000000000000000 by task syz-executor.0/9746 trace_probe_cleanup+0x8d/0xd0 free_trace_kprobe.part.14+0x15/0x50 alloc_trace_kprobe+0x23e/0x250 Link: http://lkml.kernel.org/r/1565220563-980-1-git-send-email-danielliu861@gmail.com Fixes: e3dc9f898ef9c ("tracing/probe: Add trace_event_call accesses APIs") Signed-off-by: Xinpeng Liu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_probe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index dbef0d135075..fb6bfbc5bf86 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -895,7 +895,8 @@ void trace_probe_cleanup(struct trace_probe *tp) for (i = 0; i < tp->nr_args; i++) traceprobe_free_probe_arg(&tp->args[i]); - kfree(call->class->system); + if (call->class) + kfree(call->class->system); kfree(call->name); kfree(call->print_fmt); } -- cgit v1.2.1 From 2e815627318910fb2ab004670a83ba27ac2228b6 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 26 Aug 2019 09:13:12 +0000 Subject: ftrace/x86: Remove mcount() declaration Commit 562e14f72292 ("ftrace/x86: Remove mcount support") removed the support for using mcount, so we could remove the mcount() declaration to clean up. Link: http://lkml.kernel.org/r/20190826170150.10f101ba@xhacker.debian Signed-off-by: Jisheng Zhang Signed-off-by: Steven Rostedt (VMware) --- arch/x86/include/asm/ftrace.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 287f1f7b2e52..c38a66661576 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -16,7 +16,6 @@ #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #ifndef __ASSEMBLY__ -extern void mcount(void); extern atomic_t modifying_ftrace_code; extern void __fentry__(void); -- cgit v1.2.1 From c68c9ec1c52e5bcd221eb09bc5344ad4f407b204 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Aug 2019 22:25:47 -0700 Subject: tracing: Correct kdoc formats Fix the following kdoc warnings: kernel/trace/trace.c:1579: warning: Function parameter or member 'tr' not described in 'update_max_tr_single' kernel/trace/trace.c:1579: warning: Function parameter or member 'tsk' not described in 'update_max_tr_single' kernel/trace/trace.c:1579: warning: Function parameter or member 'cpu' not described in 'update_max_tr_single' kernel/trace/trace.c:1776: warning: Function parameter or member 'type' not described in 'register_tracer' kernel/trace/trace.c:2239: warning: Function parameter or member 'task' not described in 'tracing_record_taskinfo' kernel/trace/trace.c:2239: warning: Function parameter or member 'flags' not described in 'tracing_record_taskinfo' kernel/trace/trace.c:2269: warning: Function parameter or member 'prev' not described in 'tracing_record_taskinfo_sched_switch' kernel/trace/trace.c:2269: warning: Function parameter or member 'next' not described in 'tracing_record_taskinfo_sched_switch' kernel/trace/trace.c:2269: warning: Function parameter or member 'flags' not described in 'tracing_record_taskinfo_sched_switch' kernel/trace/trace.c:3078: warning: Function parameter or member 'ip' not described in 'trace_vbprintk' kernel/trace/trace.c:3078: warning: Function parameter or member 'fmt' not described in 'trace_vbprintk' kernel/trace/trace.c:3078: warning: Function parameter or member 'args' not described in 'trace_vbprintk' Link: http://lkml.kernel.org/r/20190828052549.2472-2-jakub.kicinski@netronome.com Signed-off-by: Jakub Kicinski Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 525a97fbbc60..563e80f9006a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1567,9 +1567,9 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, /** * update_max_tr_single - only copy one trace over, and reset the rest - * @tr - tracer - * @tsk - task with the latency - * @cpu - the cpu of the buffer to copy. + * @tr: tracer + * @tsk: task with the latency + * @cpu: the cpu of the buffer to copy. * * Flip the trace of a single CPU buffer between the @tr and the max_tr. */ @@ -1767,7 +1767,7 @@ static void __init apply_trace_boot_options(void); /** * register_tracer - register a tracer with the ftrace system. - * @type - the plugin for the tracer + * @type: the plugin for the tracer * * Register a new plugin tracer. */ @@ -2230,9 +2230,9 @@ static bool tracing_record_taskinfo_skip(int flags) /** * tracing_record_taskinfo - record the task info of a task * - * @task - task to record - * @flags - TRACE_RECORD_CMDLINE for recording comm - * - TRACE_RECORD_TGID for recording tgid + * @task: task to record + * @flags: TRACE_RECORD_CMDLINE for recording comm + * TRACE_RECORD_TGID for recording tgid */ void tracing_record_taskinfo(struct task_struct *task, int flags) { @@ -2258,10 +2258,10 @@ void tracing_record_taskinfo(struct task_struct *task, int flags) /** * tracing_record_taskinfo_sched_switch - record task info for sched_switch * - * @prev - previous task during sched_switch - * @next - next task during sched_switch - * @flags - TRACE_RECORD_CMDLINE for recording comm - * TRACE_RECORD_TGID for recording tgid + * @prev: previous task during sched_switch + * @next: next task during sched_switch + * @flags: TRACE_RECORD_CMDLINE for recording comm + * TRACE_RECORD_TGID for recording tgid */ void tracing_record_taskinfo_sched_switch(struct task_struct *prev, struct task_struct *next, int flags) @@ -3072,7 +3072,9 @@ static void trace_printk_start_stop_comm(int enabled) /** * trace_vbprintk - write binary msg to tracing buffer - * + * @ip: The address of the caller + * @fmt: The string format to write to the buffer + * @args: Arguments for @fmt */ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) { -- cgit v1.2.1 From adb8049097a9ec4acd09fbd3aa8636199a78df8a Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 16 Sep 2018 16:05:53 +0100 Subject: tools/power x86_energy_perf_policy: Fix "uninitialized variable" warnings at -O2 x86_energy_perf_policy first uses __get_cpuid() to check the maximum CPUID level and exits if it is too low. It then assumes that later calls will succeed (which I think is architecturally guaranteed). It also assumes that CPUID works at all (which is not guaranteed on x86_32). If optimisations are enabled, gcc warns about potentially uninitialized variables. Fix this by adding an exit-on-error after every call to __get_cpuid() instead of just checking the maximum level. Signed-off-by: Ben Hutchings Signed-off-by: Len Brown --- .../x86_energy_perf_policy.c | 26 +++++++++++++--------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 34a796b303fe..7663abef51e9 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -1259,6 +1259,15 @@ void probe_dev_msr(void) if (system("/sbin/modprobe msr > /dev/null 2>&1")) err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); } + +static void get_cpuid_or_exit(unsigned int leaf, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + if (!__get_cpuid(leaf, eax, ebx, ecx, edx)) + errx(1, "Processor not supported\n"); +} + /* * early_cpuid() * initialize turbo_is_enabled, has_hwp, has_epb @@ -1266,15 +1275,10 @@ void probe_dev_msr(void) */ void early_cpuid(void) { - unsigned int eax, ebx, ecx, edx, max_level; + unsigned int eax, ebx, ecx, edx; unsigned int fms, family, model; - __get_cpuid(0, &max_level, &ebx, &ecx, &edx); - - if (max_level < 6) - errx(1, "Processor not supported\n"); - - __get_cpuid(1, &fms, &ebx, &ecx, &edx); + get_cpuid_or_exit(1, &fms, &ebx, &ecx, &edx); family = (fms >> 8) & 0xf; model = (fms >> 4) & 0xf; if (family == 6 || family == 0xf) @@ -1288,7 +1292,7 @@ void early_cpuid(void) bdx_highest_ratio = msr & 0xFF; } - __get_cpuid(0x6, &eax, &ebx, &ecx, &edx); + get_cpuid_or_exit(0x6, &eax, &ebx, &ecx, &edx); turbo_is_enabled = (eax >> 1) & 1; has_hwp = (eax >> 7) & 1; has_epb = (ecx >> 3) & 1; @@ -1306,7 +1310,7 @@ void parse_cpuid(void) eax = ebx = ecx = edx = 0; - __get_cpuid(0, &max_level, &ebx, &ecx, &edx); + get_cpuid_or_exit(0, &max_level, &ebx, &ecx, &edx); if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) genuine_intel = 1; @@ -1315,7 +1319,7 @@ void parse_cpuid(void) fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", (char *)&ebx, (char *)&edx, (char *)&ecx); - __get_cpuid(1, &fms, &ebx, &ecx, &edx); + get_cpuid_or_exit(1, &fms, &ebx, &ecx, &edx); family = (fms >> 8) & 0xf; model = (fms >> 4) & 0xf; stepping = fms & 0xf; @@ -1340,7 +1344,7 @@ void parse_cpuid(void) errx(1, "CPUID: no MSR"); - __get_cpuid(0x6, &eax, &ebx, &ecx, &edx); + get_cpuid_or_exit(0x6, &eax, &ebx, &ecx, &edx); /* turbo_is_enabled already set */ /* has_hwp already set */ has_hwp_notify = eax & (1 << 8); -- cgit v1.2.1 From 6ac1730f7db86f0a92d4de0f2b4ca9cd124080fe Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 16 Sep 2018 16:06:10 +0100 Subject: tools/power/x86: Enable compiler optimisations and Fortify by default Compiling without optimisations is silly, especially since some warnings depend on the optimiser. Use -O2. Fortify adds warnings for unchecked I/O (among other things), which seems to be a good idea for user-space code. Enable that too. Signed-off-by: Ben Hutchings Signed-off-by: Len Brown --- tools/power/x86/turbostat/Makefile | 3 ++- tools/power/x86/x86_energy_perf_policy/Makefile | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 045f5f7d68ab..13f1e8b9ac52 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -9,9 +9,10 @@ ifeq ("$(origin O)", "command line") endif turbostat : turbostat.c -override CFLAGS += -Wall -I../../../include +override CFLAGS += -O2 -Wall -I../../../include override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' +override CFLAGS += -D_FORTIFY_SOURCE=2 %: %.c @mkdir -p $(BUILD_OUTPUT) diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile index 1fdeef864e7c..666b325a62a2 100644 --- a/tools/power/x86/x86_energy_perf_policy/Makefile +++ b/tools/power/x86/x86_energy_perf_policy/Makefile @@ -9,8 +9,9 @@ ifeq ("$(origin O)", "command line") endif x86_energy_perf_policy : x86_energy_perf_policy.c -override CFLAGS += -Wall -I../../../include +override CFLAGS += -O2 -Wall -I../../../include override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' +override CFLAGS += -D_FORTIFY_SOURCE=2 %: %.c @mkdir -p $(BUILD_OUTPUT) -- cgit v1.2.1 From f3fe116a44fd02bc65dd312969697d06ca86b730 Mon Sep 17 00:00:00 2001 From: Matt Lupfer Date: Thu, 20 Sep 2018 10:31:44 -0400 Subject: tools/power: Fix typo in man page From context, we mean EPB (Enegry Performance Bias). Signed-off-by: Matt Lupfer Signed-off-by: Len Brown --- tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 index 17db1c3af4d0..78c6361898b1 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 @@ -40,7 +40,7 @@ in the same processor package. Hardware P-States (HWP) are effectively an expansion of hardware P-state control from the opportunistic turbo-mode P-state range to include the entire range of available P-states. -On Broadwell Xeon, the initial HWP implementation, EBP influenced HWP. +On Broadwell Xeon, the initial HWP implementation, EPB influenced HWP. That influence was removed in subsequent generations, where it was moved to the Energy_Performance_Preference (EPP) field in -- cgit v1.2.1 From 03531482402a2bc4ab93cf6dde46833775e035e9 Mon Sep 17 00:00:00 2001 From: "Zephaniah E. Loss-Cutler-Hull" Date: Sat, 9 Feb 2019 05:25:48 -0800 Subject: tools/power x86_energy_perf_policy: Fix argument parsing The -w argument in x86_energy_perf_policy currently triggers an unconditional segfault. This is because the argument string reads: "+a:c:dD:E:e:f:m:M:rt:u:vw" and yet the argument handler expects an argument. When parse_optarg_string is called with a null argument, we then proceed to crash in strncmp, not horribly friendly. The man page describes -w as taking an argument, the long form (--hwp-window) is correctly marked as taking a required argument, and the code expects it. As such, this patch simply marks the short form (-w) as requiring an argument. Signed-off-by: Zephaniah E. Loss-Cutler-Hull Signed-off-by: Len Brown --- tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 7663abef51e9..3fe1eed900d4 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -545,7 +545,7 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+a:c:dD:E:e:f:m:M:rt:u:vw", + while ((opt = getopt_long_only(argc, argv, "+a:c:dD:E:e:f:m:M:rt:u:vw:", long_options, &option_index)) != -1) { switch (opt) { case 'a': -- cgit v1.2.1 From d743dae6d1936160366a32f3400f03db1da9421b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 31 Aug 2019 12:30:24 -0400 Subject: tools/power turbostat: remove duplicate pc10 column Remove the duplicate pc10 column. Fixes: be0e54c4ebbf ("turbostat: Build-in "Low Power Idle" counters support") Reported-by: Naoya Horiguchi Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 75fc4fb9901c..90f7e8b4d4d4 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -849,7 +849,6 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "pc8: %016llX\n", p->pc8); outp += sprintf(outp, "pc9: %016llX\n", p->pc9); outp += sprintf(outp, "pc10: %016llX\n", p->pc10); - outp += sprintf(outp, "pc10: %016llX\n", p->pc10); outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg); -- cgit v1.2.1 From d4794f25f122aa1a8a073be51112edaa723ffff4 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 25 Mar 2019 17:32:42 +0000 Subject: tools/power turbostat: Make interval calculation per thread to reduce jitter Turbostat currently normalizes TSC and other values by dividing by an interval. This interval is the delta between the start of one global (all counters on all CPUs) sampling and the start of another. However, this introduces a lot of jitter into the data. In order to reduce jitter, the interval calculation should be based on timestamps taken per thread and close to the start of the thread's sampling. Define a per thread time value to hold the delta between samples taken on the thread. Use the timestamp taken at the beginning of sampling to calculate the delta. Move the thread's beginning timestamp to after the CPU migration to avoid jitter due to the migration. Use the global time delta for the average time delta. Signed-off-by: Yazen Ghannam Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 90f7e8b4d4d4..02813a2a8ffd 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -166,6 +166,7 @@ size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size; struct thread_data { struct timeval tv_begin; struct timeval tv_end; + struct timeval tv_delta; unsigned long long tsc; unsigned long long aperf; unsigned long long mperf; @@ -910,7 +911,7 @@ int format_counters(struct thread_data *t, struct core_data *c, if (DO_BIC(BIC_TOD)) outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec); - interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; + interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec/1000000.0; tsc = t->tsc * tsc_tweak; @@ -1308,6 +1309,7 @@ delta_thread(struct thread_data *new, struct thread_data *old, * over-write old w/ new so we can print end of interval values */ + timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta); old->tv_begin = new->tv_begin; old->tv_end = new->tv_end; @@ -1403,6 +1405,8 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data t->tv_begin.tv_usec = 0; t->tv_end.tv_sec = 0; t->tv_end.tv_usec = 0; + t->tv_delta.tv_sec = 0; + t->tv_delta.tv_usec = 0; t->tsc = 0; t->aperf = 0; @@ -1572,6 +1576,9 @@ void compute_average(struct thread_data *t, struct core_data *c, for_all_cpus(sum_counters, t, c, p); + /* Use the global time delta for the average. */ + average.threads.tv_delta = tv_delta; + average.threads.tsc /= topo.num_cpus; average.threads.aperf /= topo.num_cpus; average.threads.mperf /= topo.num_cpus; @@ -1761,13 +1768,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) struct msr_counter *mp; int i; - gettimeofday(&t->tv_begin, (struct timezone *)NULL); - if (cpu_migrate(cpu)) { fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; } + gettimeofday(&t->tv_begin, (struct timezone *)NULL); + if (first_counter_read) get_apic_id(t); retry: -- cgit v1.2.1 From 15423b958f33132152e209e98df0dedc7a78f22c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 8 Apr 2019 10:00:44 +0100 Subject: tools/power turbostat: fix leak of file descriptor on error return path Currently the error return path does not close the file fp and leaks a file descriptor. Fix this by closing the file. Fixes: 5ea7647b333f ("tools/power turbostat: Warn on bad ACPI LPIT data") Signed-off-by: Colin Ian King Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 02813a2a8ffd..41cf1206273c 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2944,6 +2944,7 @@ int snapshot_sys_lpi_us(void) if (retval != 1) { fprintf(stderr, "Disabling Low Power Idle System output\n"); BIC_NOT_PRESENT(BIC_SYS_LPI); + fclose(fp); return -1; } fclose(fp); -- cgit v1.2.1 From 605736c6929d541c78a85dffae4d33a23b6b2149 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 8 Apr 2019 11:12:40 -0500 Subject: tools/power turbostat: fix file descriptor leaks Fix file descriptor leaks by closing fp before return. Addresses-Coverity-ID: 1444591 ("Resource leak") Addresses-Coverity-ID: 1444592 ("Resource leak") Fixes: 5ea7647b333f ("tools/power turbostat: Warn on bad ACPI LPIT data") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 41cf1206273c..2fb5c155289b 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2918,6 +2918,7 @@ int snapshot_cpu_lpi_us(void) if (retval != 1) { fprintf(stderr, "Disabling Low Power Idle CPU output\n"); BIC_NOT_PRESENT(BIC_CPU_LPI); + fclose(fp); return -1; } -- cgit v1.2.1 From eeb71c950bc6eee460f2070643ce137e067b234c Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 3 Apr 2019 16:02:14 +0900 Subject: tools/power turbostat: fix buffer overrun turbostat could be terminated by general protection fault on some latest hardwares which (for example) support 9 levels of C-states and show 18 "tADDED" lines. That bloats the total output and finally causes buffer overrun. So let's extend the buffer to avoid this. Signed-off-by: Naoya Horiguchi Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 2fb5c155289b..f8f4e1c130a6 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5131,7 +5131,7 @@ int initialize_counters(int cpu_id) void allocate_output_buffer() { - output_buffer = calloc(1, (1 + topo.num_cpus) * 1024); + output_buffer = calloc(1, (1 + topo.num_cpus) * 2048); outp = output_buffer; if (outp == NULL) err(-1, "calloc output buffer"); -- cgit v1.2.1 From b62b3184576b8f87ca655dd9bfd1ae02fd4e50a5 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Sun, 21 Apr 2019 16:30:22 +0800 Subject: tools/power turbostat: add Jacobsville support Jacobsville behaves like Denverton. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index f8f4e1c130a6..35f4366a522e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4591,6 +4591,9 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ICELAKE_MOBILE: return INTEL_FAM6_CANNONLAKE_MOBILE; + + case INTEL_FAM6_ATOM_TREMONT_X: + return INTEL_FAM6_ATOM_GOLDMONT_X; } return model; } -- cgit v1.2.1 From cd188af5282d9f9e65f63915b13239bafc746f8d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 31 Aug 2019 14:09:29 -0400 Subject: tools/power turbostat: Fix Haswell Core systems turbostat: cpu0: msr offset 0x630 read failed: Input/output error because Haswell Core does not have C8-C10. Output C8-C10 only on Haswell ULT. Fixes: f5a4c76ad7de ("tools/power turbostat: consolidate duplicate model numbers") Reported-by: Prarit Bhargava Suggested-by: Kosuke Tatsukawa Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 35f4366a522e..78e7c94b94bf 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3217,6 +3217,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) break; case INTEL_FAM6_HASWELL_CORE: /* HSW */ case INTEL_FAM6_HASWELL_X: /* HSX */ + case INTEL_FAM6_HASWELL_ULT: /* HSW */ case INTEL_FAM6_HASWELL_GT3E: /* HSW */ case INTEL_FAM6_BROADWELL_CORE: /* BDW */ case INTEL_FAM6_BROADWELL_GT3E: /* BDW */ @@ -3413,6 +3414,7 @@ int has_config_tdp(unsigned int family, unsigned int model) case INTEL_FAM6_IVYBRIDGE: /* IVB */ case INTEL_FAM6_HASWELL_CORE: /* HSW */ case INTEL_FAM6_HASWELL_X: /* HSX */ + case INTEL_FAM6_HASWELL_ULT: /* HSW */ case INTEL_FAM6_HASWELL_GT3E: /* HSW */ case INTEL_FAM6_BROADWELL_CORE: /* BDW */ case INTEL_FAM6_BROADWELL_GT3E: /* BDW */ @@ -3849,6 +3851,7 @@ void rapl_probe_intel(unsigned int family, unsigned int model) case INTEL_FAM6_SANDYBRIDGE: case INTEL_FAM6_IVYBRIDGE: case INTEL_FAM6_HASWELL_CORE: /* HSW */ + case INTEL_FAM6_HASWELL_ULT: /* HSW */ case INTEL_FAM6_HASWELL_GT3E: /* HSW */ case INTEL_FAM6_BROADWELL_CORE: /* BDW */ case INTEL_FAM6_BROADWELL_GT3E: /* BDW */ @@ -4040,6 +4043,7 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model) switch (model) { case INTEL_FAM6_HASWELL_CORE: /* HSW */ + case INTEL_FAM6_HASWELL_ULT: /* HSW */ case INTEL_FAM6_HASWELL_GT3E: /* HSW */ do_gfx_perf_limit_reasons = 1; case INTEL_FAM6_HASWELL_X: /* HSX */ @@ -4259,6 +4263,7 @@ int has_snb_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ case INTEL_FAM6_HASWELL_CORE: /* HSW */ case INTEL_FAM6_HASWELL_X: /* HSW */ + case INTEL_FAM6_HASWELL_ULT: /* HSW */ case INTEL_FAM6_HASWELL_GT3E: /* HSW */ case INTEL_FAM6_BROADWELL_CORE: /* BDW */ case INTEL_FAM6_BROADWELL_GT3E: /* BDW */ @@ -4292,7 +4297,7 @@ int has_hsw_msrs(unsigned int family, unsigned int model) return 0; switch (model) { - case INTEL_FAM6_HASWELL_CORE: + case INTEL_FAM6_HASWELL_ULT: /* HSW */ case INTEL_FAM6_BROADWELL_CORE: /* BDW */ case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */ case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ @@ -4576,9 +4581,6 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_XEON_PHI_KNM: return INTEL_FAM6_XEON_PHI_KNL; - case INTEL_FAM6_HASWELL_ULT: - return INTEL_FAM6_HASWELL_CORE; - case INTEL_FAM6_BROADWELL_X: case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */ return INTEL_FAM6_BROADWELL_X; -- cgit v1.2.1 From 570992fc5733b5e1b00a4bdb9272df1e25d63972 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 31 Aug 2019 14:16:07 -0400 Subject: tools/power turbostat: rename has_hsw_msrs() Perhaps if this more descriptive name had been used, then we wouldn't have had the HSW ULT vs HSW CORE bug, fixed by the previous commit. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 78e7c94b94bf..51c739043214 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4280,7 +4280,7 @@ int has_snb_msrs(unsigned int family, unsigned int model) } /* - * HSW adds support for additional MSRs: + * HSW ULT added support for C8/C9/C10 MSRs: * * MSR_PKG_C8_RESIDENCY 0x00000630 * MSR_PKG_C9_RESIDENCY 0x00000631 @@ -4291,7 +4291,7 @@ int has_snb_msrs(unsigned int family, unsigned int model) * MSR_PKGC10_IRTL 0x00000635 * */ -int has_hsw_msrs(unsigned int family, unsigned int model) +int has_c8910_msrs(unsigned int family, unsigned int model) { if (!genuine_intel) return 0; @@ -4833,12 +4833,12 @@ void process_cpuid() BIC_NOT_PRESENT(BIC_CPU_c7); BIC_NOT_PRESENT(BIC_Pkgpc7); } - if (has_hsw_msrs(family, model)) { + if (has_c8910_msrs(family, model)) { BIC_PRESENT(BIC_Pkgpc8); BIC_PRESENT(BIC_Pkgpc9); BIC_PRESENT(BIC_Pkgpc10); } - do_irtl_hsw = has_hsw_msrs(family, model); + do_irtl_hsw = has_c8910_msrs(family, model); if (has_skl_msrs(family, model)) { BIC_PRESENT(BIC_Totl_c0); BIC_PRESENT(BIC_Any_c0); -- cgit v1.2.1 From d93ea567fc4eec2d3581015e23d2c555f8b393ba Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Fri, 14 Jun 2019 13:09:46 +0530 Subject: tools/power turbostat: Add Ice Lake NNPI support This enables turbostat utility on Ice Lake NNPI SoC. Link: https://lkml.org/lkml/2019/6/5/1034 Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 51c739043214..393509655449 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4592,6 +4592,7 @@ unsigned int intel_model_duplicates(unsigned int model) return INTEL_FAM6_SKYLAKE_MOBILE; case INTEL_FAM6_ICELAKE_MOBILE: + case INTEL_FAM6_ICELAKE_NNPI: return INTEL_FAM6_CANNONLAKE_MOBILE; case INTEL_FAM6_ATOM_TREMONT_X: -- cgit v1.2.1 From c026c23629b825100fd4b8223227d9a395f9a56b Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 14 Aug 2019 20:12:55 +0300 Subject: tools/power turbostat: read from pipes too Commit '47936f944e78 tools/power turbostat: fix printing on input' make a valid fix, but it completely disabled piped stdin support, which is a valuable use-case. Indeed, if stdin is a pipe, turbostat won't read anything from it, so it becomes impossible to get turbostat output at user-defined moments, instead of the regular intervals. There is no reason why this should works for terminals, but not for pipes. This patch improves the situation. Instead of ignoring pipes, we read data from them but gracefully handle the EOF case. Signed-off-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 393509655449..095bd52cc086 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -100,6 +100,7 @@ unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ unsigned int has_misc_feature_control; unsigned int first_counter_read = 1; +int ignore_stdin; #define RAPL_PKG (1 << 0) /* 0x610 MSR_PKG_POWER_LIMIT */ @@ -3013,26 +3014,37 @@ void setup_signal_handler(void) void do_sleep(void) { - struct timeval select_timeout; + struct timeval tout; + struct timespec rest; fd_set readfds; int retval; FD_ZERO(&readfds); FD_SET(0, &readfds); - if (!isatty(fileno(stdin))) { + if (ignore_stdin) { nanosleep(&interval_ts, NULL); return; } - select_timeout = interval_tv; - retval = select(1, &readfds, NULL, NULL, &select_timeout); + tout = interval_tv; + retval = select(1, &readfds, NULL, NULL, &tout); if (retval == 1) { switch (getc(stdin)) { case 'q': exit_requested = 1; break; + case EOF: + /* + * 'stdin' is a pipe closed on the other end. There + * won't be any further input. + */ + ignore_stdin = 1; + /* Sleep the rest of the time */ + rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000); + rest.tv_nsec = (tout.tv_usec % 1000000) * 1000; + nanosleep(&rest, NULL); } /* make sure this manually-invoked interval is at least 1ms long */ nanosleep(&one_msec, NULL); -- cgit v1.2.1 From 6ee9fc63d2e7999f93a466e202ae3b557e9c739c Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 14 Aug 2019 20:12:56 +0300 Subject: tools/power turbostat: do not enforce 1ms Turbostat works by taking a snapshot of counters, sleeping, taking another snapshot, calculating deltas, and printing out the table. The sleep time is controlled via -i option or by user sending a signal or a character to stdin. In the latter case, turbostat always adds 1 ms sleep before it reads the counters, in order to avoid larger imprecisions in the results in prints. While the 1 ms delay may be a good idea for a "dumb" user, it is a problem for an "aware" user. I do thousands and thousands of measurements over a short period of time (like 2ms), and turbostat unconditionally adds a 1ms to my interval, so I cannot get what I really need. This patch removes the unconditional 1ms sleep. This is an expert user tool, after all, and non-experts will unlikely ever use it in the non-fixed interval mode anyway, so I think it is OK to remove the 1ms delay. Signed-off-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 095bd52cc086..7d72268e546d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -39,7 +39,6 @@ FILE *outf; int *fd_percpu; struct timeval interval_tv = {5, 0}; struct timespec interval_ts = {5, 0}; -struct timespec one_msec = {0, 1000000}; unsigned int num_iterations; unsigned int debug; unsigned int quiet; @@ -2994,8 +2993,6 @@ static void signal_handler (int signal) fprintf(stderr, "SIGUSR1\n"); break; } - /* make sure this manually-invoked interval is at least 1ms long */ - nanosleep(&one_msec, NULL); } void setup_signal_handler(void) @@ -3046,8 +3043,6 @@ void do_sleep(void) rest.tv_nsec = (tout.tv_usec % 1000000) * 1000; nanosleep(&rest, NULL); } - /* make sure this manually-invoked interval is at least 1ms long */ - nanosleep(&one_msec, NULL); } } -- cgit v1.2.1 From 1e9042b9c8d46ada9ee7b3339a31f50d12e5d291 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 27 Aug 2019 10:57:14 -0700 Subject: tools/power turbostat: Fix CPU%C1 display value In some case C1% will be wrong value, when platform doesn't have MSR for C1 residency. For example: Core CPU CPU%c1 - - 100.00 0 0 100.00 0 2 100.00 1 1 100.00 1 3 100.00 But adding Busy% will fix this Core CPU Busy% CPU%c1 - - 99.77 0.23 0 0 99.77 0.23 0 2 99.77 0.23 1 1 99.77 0.23 1 3 99.77 0.23 This issue can be reproduced on most of the recent systems including Broadwell, Skylake and later. This is because if we don't select Busy% or Avg_MHz or Bzy_MHz then mperf value will not be read from MSR, so it will be 0. But this is required for C1% calculation when MSR for C1 residency is not present. Same is true for C3, C6 and C7 column selection. So add another define DO_BIC_READ(), which doesn't depend on user column selection and use for mperf, C3, C6 and C7 related counters. So when there is no platform support for C1 residency counters, we still read these counters, if the CPU has support and user selected display of CPU%c1. Signed-off-by: Srinivas Pandruvada Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 7d72268e546d..f57c4023231e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -507,6 +507,7 @@ unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAU unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) +#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) @@ -1287,6 +1288,14 @@ delta_core(struct core_data *new, struct core_data *old) } } +int soft_c1_residency_display(int bic) +{ + if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr) + return 0; + + return DO_BIC_READ(bic); +} + /* * old = new - old */ @@ -1323,7 +1332,8 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->c1 = new->c1 - old->c1; - if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) { + if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || + soft_c1_residency_display(BIC_Avg_MHz)) { if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { old->aperf = new->aperf - old->aperf; old->mperf = new->mperf - old->mperf; @@ -1780,7 +1790,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) retry: t->tsc = rdtsc(); /* we are running on local CPU of interest */ - if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) { + if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || + soft_c1_residency_display(BIC_Avg_MHz)) { unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; /* @@ -1857,20 +1868,20 @@ retry: if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; - if (DO_BIC(BIC_CPU_c3)) { + if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) { if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) return -6; } - if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) { + if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) { if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) return -7; - } else if (do_knl_cstates) { + } else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) { if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6)) return -7; } - if (DO_BIC(BIC_CPU_c7)) + if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) return -8; -- cgit v1.2.1 From 9cfa8e042f7cbb1994cc5923e46c78b36f6054f4 Mon Sep 17 00:00:00 2001 From: Pu Wen Date: Sat, 31 Aug 2019 10:19:58 +0800 Subject: tools/power turbostat: Fix caller parameter of get_tdp_amd() Commit 9392bd98bba760be96ee ("tools/power turbostat: Add support for AMD Fam 17h (Zen) RAPL") add a function get_tdp_amd(), the parameter is CPU family. But the rapl_probe_amd() function use wrong model parameter. Fix the wrong caller parameter of get_tdp_amd() to use family. Cc: # v5.1+ Signed-off-by: Pu Wen Reviewed-by: Calvin Walton Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index f57c4023231e..6cec6aa01241 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4031,7 +4031,7 @@ void rapl_probe_amd(unsigned int family, unsigned int model) rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f)); rapl_power_units = ldexp(1.0, -(msr & 0xf)); - tdp = get_tdp_amd(model); + tdp = get_tdp_amd(family); rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; if (!quiet) -- cgit v1.2.1 From c1c10cc77883932abdb7b103687ecbb01e80bef9 Mon Sep 17 00:00:00 2001 From: Pu Wen Date: Sat, 31 Aug 2019 10:20:31 +0800 Subject: tools/power turbostat: Add support for Hygon Fam 18h (Dhyana) RAPL Commit 9392bd98bba760be96ee ("tools/power turbostat: Add support for AMD Fam 17h (Zen) RAPL") and the commit 3316f99a9f1b68c578c5 ("tools/power turbostat: Also read package power on AMD F17h (Zen)") add AMD Fam 17h RAPL support. Hygon Family 18h(Dhyana) support RAPL in bit 14 of CPUID 0x80000007 EDX, and has MSRs RAPL_PWR_UNIT/CORE_ENERGY_STAT/PKG_ENERGY_STAT. So add Hygon Dhyana Family 18h support for RAPL. Already tested on Hygon multi-node systems and it shows correct per-core energy usage and the total package power. Signed-off-by: Pu Wen Reviewed-by: Calvin Walton Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 6cec6aa01241..e8b6c608d6d1 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -59,6 +59,7 @@ unsigned int do_irtl_hsw; unsigned int units = 1000000; /* MHz etc */ unsigned int genuine_intel; unsigned int authentic_amd; +unsigned int hygon_genuine; unsigned int max_level, max_extended_level; unsigned int has_invariant_tsc; unsigned int do_nhm_platform_info; @@ -1730,7 +1731,7 @@ void get_apic_id(struct thread_data *t) if (!DO_BIC(BIC_X2APIC)) return; - if (authentic_amd) { + if (authentic_amd || hygon_genuine) { unsigned int topology_extensions; if (max_extended_level < 0x8000001e) @@ -3831,6 +3832,7 @@ double get_tdp_amd(unsigned int family) { switch (family) { case 0x17: + case 0x18: default: /* This is the max stock TDP of HEDT/Server Fam17h chips */ return 250.0; @@ -4011,6 +4013,7 @@ void rapl_probe_amd(unsigned int family, unsigned int model) switch (family) { case 0x17: /* Zen, Zen+ */ + case 0x18: /* Hygon Dhyana */ do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; if (rapl_joules) { BIC_PRESENT(BIC_Pkg_J); @@ -4047,7 +4050,7 @@ void rapl_probe(unsigned int family, unsigned int model) { if (genuine_intel) rapl_probe_intel(family, model); - if (authentic_amd) + if (authentic_amd || hygon_genuine) rapl_probe_amd(family, model); } @@ -4632,6 +4635,8 @@ void process_cpuid() genuine_intel = 1; else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) authentic_amd = 1; + else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e) + hygon_genuine = 1; if (!quiet) fprintf(outf, "CPUID(0): %.4s%.4s%.4s ", -- cgit v1.2.1 From 9eb4b5180d33c827f16829644ae0cd7382ecdb82 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 31 Aug 2019 14:40:39 -0400 Subject: tools/power turbostat: update version number Today is 19.08.31, at least in some parts of the world. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index e8b6c608d6d1..b2a86438f074 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5306,7 +5306,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 19.03.20" + fprintf(outf, "turbostat version 19.08.31" " - Len Brown \n"); } -- cgit v1.2.1 From 5f81d5455589df2f580e634c2e9da55b80e63c30 Mon Sep 17 00:00:00 2001 From: George McCollister Date: Thu, 29 Aug 2019 09:14:41 -0500 Subject: net: dsa: microchip: fill regmap_config name Use the register value width as the regmap_config name to prevent the following error when the second and third regmap_configs are initialized. "debugfs: Directory '${bus-id}' with parent 'regmap' already present!" Signed-off-by: George McCollister Reviewed-by: Marek Vasut Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz_common.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index ee7096d8af07..72ec250b9540 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -128,6 +128,7 @@ static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset, #define KSZ_REGMAP_ENTRY(width, swp, regbits, regpad, regalign) \ { \ + .name = #width, \ .val_bits = (width), \ .reg_stride = (width) / 8, \ .reg_bits = (regbits) + (regalign), \ -- cgit v1.2.1 From efb55222d31c8c2af8b4ba42bb56b0b0beedf98e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Aug 2019 04:07:21 +0300 Subject: taprio: Fix kernel panic in taprio_destroy taprio_init may fail earlier than this line: list_add(&q->taprio_list, &taprio_list); i.e. due to the net device not being multi queue. Attempting to remove q from the global taprio_list when it is not part of it will result in a kernel panic. Fix it by matching list_add and list_del better to one another in the order of operations. This way we can keep the deletion unconditional and with lower complexity - O(1). Cc: Leandro Dorileo Fixes: 7b9eba7ba0c1 ("net/sched: taprio: fix picos_per_byte miscalculation") Signed-off-by: Vladimir Oltean Acked-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index e25d414ae12f..8c85f61e98dd 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1249,6 +1249,10 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, */ q->clockid = -1; + spin_lock(&taprio_list_lock); + list_add(&q->taprio_list, &taprio_list); + spin_unlock(&taprio_list_lock); + if (sch->parent != TC_H_ROOT) return -EOPNOTSUPP; @@ -1266,10 +1270,6 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - spin_lock(&taprio_list_lock); - list_add(&q->taprio_list, &taprio_list); - spin_unlock(&taprio_list_lock); - for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *dev_queue; struct Qdisc *qdisc; -- cgit v1.2.1 From f04b514c0ce2f20aad757ebfb04e07b1a12d2b58 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Aug 2019 04:07:22 +0300 Subject: taprio: Set default link speed to 10 Mbps in taprio_set_picos_per_byte The taprio budget needs to be adapted at runtime according to interface link speed. But that handling is problematic. For one thing, installing a qdisc on an interface that doesn't have carrier is not illegal. But taprio prints the following stack trace: [ 31.851373] ------------[ cut here ]------------ [ 31.856024] WARNING: CPU: 1 PID: 207 at net/sched/sch_taprio.c:481 taprio_dequeue+0x1a8/0x2d4 [ 31.864566] taprio: dequeue() called with unknown picos per byte. [ 31.864570] Modules linked in: [ 31.873701] CPU: 1 PID: 207 Comm: tc Not tainted 5.3.0-rc5-01199-g8838fe023cd6 #1689 [ 31.881398] Hardware name: Freescale LS1021A [ 31.885661] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 31.893368] [] (show_stack) from [] (dump_stack+0xb4/0xc8) [ 31.900555] [] (dump_stack) from [] (__warn+0xe0/0xf8) [ 31.907395] [] (__warn) from [] (warn_slowpath_fmt+0x48/0x6c) [ 31.914841] [] (warn_slowpath_fmt) from [] (taprio_dequeue+0x1a8/0x2d4) [ 31.923150] [] (taprio_dequeue) from [] (__qdisc_run+0x90/0x61c) [ 31.930856] [] (__qdisc_run) from [] (net_tx_action+0x12c/0x2bc) [ 31.938560] [] (net_tx_action) from [] (__do_softirq+0x130/0x3c8) [ 31.946350] [] (__do_softirq) from [] (irq_exit+0xbc/0xd8) [ 31.953536] [] (irq_exit) from [] (__handle_domain_irq+0x60/0xb4) [ 31.961328] [] (__handle_domain_irq) from [] (gic_handle_irq+0x58/0x9c) [ 31.969638] [] (gic_handle_irq) from [] (__irq_svc+0x6c/0x90) [ 31.977076] Exception stack(0xe8167b20 to 0xe8167b68) [ 31.982100] 7b20: e9d4bd80 00000cc0 000000cf 00000000 e9d4bd80 c1f38958 00000cc0 c1f38960 [ 31.990234] 7b40: 00000001 000000cf 00000004 e9dc0800 00000000 e8167b70 c0f478ec c0f46d94 [ 31.998363] 7b60: 60070013 ffffffff [ 32.001833] [] (__irq_svc) from [] (netlink_trim+0x18/0xd8) [ 32.009104] [] (netlink_trim) from [] (netlink_broadcast_filtered+0x34/0x414) [ 32.017930] [] (netlink_broadcast_filtered) from [] (netlink_broadcast+0x20/0x28) [ 32.027102] [] (netlink_broadcast) from [] (rtnetlink_send+0x34/0x88) [ 32.035238] [] (rtnetlink_send) from [] (notify_and_destroy+0x2c/0x44) [ 32.043461] [] (notify_and_destroy) from [] (qdisc_graft+0x398/0x470) [ 32.051595] [] (qdisc_graft) from [] (tc_modify_qdisc+0x3a4/0x724) [ 32.059470] [] (tc_modify_qdisc) from [] (rtnetlink_rcv_msg+0x260/0x2ec) [ 32.067864] [] (rtnetlink_rcv_msg) from [] (netlink_rcv_skb+0xb8/0x110) [ 32.076172] [] (netlink_rcv_skb) from [] (netlink_unicast+0x1b4/0x22c) [ 32.084392] [] (netlink_unicast) from [] (netlink_sendmsg+0x33c/0x380) [ 32.092614] [] (netlink_sendmsg) from [] (sock_sendmsg+0x14/0x24) [ 32.100403] [] (sock_sendmsg) from [] (___sys_sendmsg+0x214/0x228) [ 32.108279] [] (___sys_sendmsg) from [] (__sys_sendmsg+0x50/0x8c) [ 32.116068] [] (__sys_sendmsg) from [] (ret_fast_syscall+0x0/0x54) [ 32.123938] Exception stack(0xe8167fa8 to 0xe8167ff0) [ 32.128960] 7fa0: b6fa68c8 000000f8 00000003 bea142d0 00000000 00000000 [ 32.137093] 7fc0: b6fa68c8 000000f8 0052154c 00000128 5d6468a2 00000000 00000028 00558c9c [ 32.145224] 7fe0: 00000070 bea14278 00530d64 b6e17e64 [ 32.150659] ---[ end trace 2139c9827c3e5177 ]--- This happens because the qdisc ->dequeue callback gets called. Which again is not illegal, the qdisc will dequeue even when the interface is up but doesn't have carrier (and hence SPEED_UNKNOWN), and the frames will be dropped further down the stack in dev_direct_xmit(). And, at the end of the day, for what? For calculating the initial budget of an interface which is non-operational at the moment and where frames will get dropped anyway. So if we can't figure out the link speed, default to SPEED_10 and move along. We can also remove the runtime check now. Cc: Leandro Dorileo Fixes: 7b9eba7ba0c1 ("net/sched: taprio: fix picos_per_byte miscalculation") Acked-by: Vinicius Costa Gomes Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 8c85f61e98dd..8d8bc2ec5cd6 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -477,11 +477,6 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch) u32 gate_mask; int i; - if (atomic64_read(&q->picos_per_byte) == -1) { - WARN_ONCE(1, "taprio: dequeue() called with unknown picos per byte."); - return NULL; - } - rcu_read_lock(); entry = rcu_dereference(q->current_entry); /* if there's no entry, it means that the schedule didn't @@ -958,12 +953,20 @@ static void taprio_set_picos_per_byte(struct net_device *dev, struct taprio_sched *q) { struct ethtool_link_ksettings ecmd; - int picos_per_byte = -1; + int speed = SPEED_10; + int picos_per_byte; + int err; + + err = __ethtool_get_link_ksettings(dev, &ecmd); + if (err < 0) + goto skip; + + if (ecmd.base.speed != SPEED_UNKNOWN) + speed = ecmd.base.speed; - if (!__ethtool_get_link_ksettings(dev, &ecmd) && - ecmd.base.speed != SPEED_UNKNOWN) - picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, - ecmd.base.speed * 1000 * 1000); +skip: + picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, + speed * 1000 * 1000); atomic64_set(&q->picos_per_byte, picos_per_byte); netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n", -- cgit v1.2.1 From 1c6c09a0ae62fa3ea8f8ead2ac3920e6fff2de64 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Aug 2019 04:07:23 +0300 Subject: net/sched: cbs: Set default link speed to 10 Mbps in cbs_set_port_rate The discussion to be made is absolutely the same as in the case of previous patch ("taprio: Set default link speed to 10 Mbps in taprio_set_picos_per_byte"). Nothing is lost when setting a default. Cc: Leandro Dorileo Fixes: e0a7683d30e9 ("net/sched: cbs: fix port_rate miscalculation") Acked-by: Vinicius Costa Gomes Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/sched/sch_cbs.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 732e109c3055..810645b5c086 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -181,11 +181,6 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) s64 credits; int len; - if (atomic64_read(&q->port_rate) == -1) { - WARN_ONCE(1, "cbs: dequeue() called with unknown port rate."); - return NULL; - } - if (q->credits < 0) { credits = timediff_to_credits(now - q->last, q->idleslope); @@ -303,11 +298,19 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q) { struct ethtool_link_ksettings ecmd; + int speed = SPEED_10; int port_rate = -1; + int err; + + err = __ethtool_get_link_ksettings(dev, &ecmd); + if (err < 0) + goto skip; + + if (ecmd.base.speed != SPEED_UNKNOWN) + speed = ecmd.base.speed; - if (!__ethtool_get_link_ksettings(dev, &ecmd) && - ecmd.base.speed != SPEED_UNKNOWN) - port_rate = ecmd.base.speed * 1000 * BYTES_PER_KBIT; +skip: + port_rate = speed * 1000 * BYTES_PER_KBIT; atomic64_set(&q->port_rate, port_rate); netdev_dbg(dev, "cbs: set %s's port_rate to: %lld, linkspeed: %d\n", -- cgit v1.2.1 From 6fdc060d7476ef73c8029b652d252c1a7b4de948 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Fri, 30 Aug 2019 12:08:30 +0000 Subject: net: aquantia: fix removal of vlan 0 Due to absence of checking against the rx flow rule when vlan 0 is being removed, the other rule could be removed instead of the rule with vlan 0 Fixes: 7975d2aff5afb ("net: aquantia: add support of rx-vlan-filter offload") Signed-off-by: Dmitry Bogdanov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_filters.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c index 440690b18734..b13704544a23 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c @@ -431,7 +431,8 @@ int aq_del_fvlan_by_vlan(struct aq_nic_s *aq_nic, u16 vlan_id) if (be16_to_cpu(rule->aq_fsp.h_ext.vlan_tci) == vlan_id) break; } - if (rule && be16_to_cpu(rule->aq_fsp.h_ext.vlan_tci) == vlan_id) { + if (rule && rule->type == aq_rx_filter_vlan && + be16_to_cpu(rule->aq_fsp.h_ext.vlan_tci) == vlan_id) { struct ethtool_rxnfc cmd; cmd.fs.location = rule->aq_fsp.location; -- cgit v1.2.1 From 392349f60110dc2c3daf86464fd926afc53d6143 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Fri, 30 Aug 2019 12:08:33 +0000 Subject: net: aquantia: fix limit of vlan filters Fix a limit condition of vlans on the interface before setting vlan promiscuous mode Fixes: 48dd73d08d4dd ("net: aquantia: fix vlans not working over bridged network") Signed-off-by: Dmitry Bogdanov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_filters.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c index b13704544a23..aee827f07c16 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c @@ -844,7 +844,7 @@ int aq_filters_vlans_update(struct aq_nic_s *aq_nic) return err; if (aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) { - if (hweight < AQ_VLAN_MAX_FILTERS && hweight > 0) { + if (hweight <= AQ_VLAN_MAX_FILTERS && hweight > 0) { err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, !(aq_nic->packet_filter & IFF_PROMISC)); aq_nic->aq_nic_cfg.is_vlan_force_promisc = false; -- cgit v1.2.1 From c2ef057ee775e229d3138add59f937d93a3a59d8 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Fri, 30 Aug 2019 12:08:35 +0000 Subject: net: aquantia: reapply vlan filters on up In case of device reconfiguration the driver may reset the device invisible for other modules, vlan module in particular. So vlans will not be removed&created and vlan filters will not be configured in the device. The patch reapplies the vlan filters at device start. Fixes: 7975d2aff5afb ("net: aquantia: add support of rx-vlan-filter offload") Signed-off-by: Dmitry Bogdanov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index 100722ad5c2d..b4a0fb281e69 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -61,6 +61,10 @@ static int aq_ndev_open(struct net_device *ndev) if (err < 0) goto err_exit; + err = aq_filters_vlans_update(aq_nic); + if (err < 0) + goto err_exit; + err = aq_nic_start(aq_nic); if (err < 0) goto err_exit; -- cgit v1.2.1 From 5c47e3ba6fe52465603cf9d816b3371e6881d649 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Fri, 30 Aug 2019 12:08:36 +0000 Subject: net: aquantia: linkstate irq should be oneshot Declaring threaded irq handler should also indicate the irq is oneshot. It is oneshot indeed, because HW implements irq automasking on trigger. Not declaring this causes some kernel configurations to fail on interface up, because request_threaded_irq returned an err code. The issue was originally hidden on normal x86_64 configuration with latest kernel, because depending on interrupt controller, irq driver added ONESHOT flag on its own. Issue was observed on older kernels (4.14) where no such logic exists. Fixes: 4c83f170b3ac ("net: aquantia: link status irq handling") Signed-off-by: Igor Russkikh Reported-by: Michael Symolkin Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index e1392766e21e..8f66e7817811 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -393,7 +393,7 @@ int aq_nic_start(struct aq_nic_s *self) self->aq_nic_cfg.link_irq_vec); err = request_threaded_irq(irqvec, NULL, aq_linkstate_threaded_isr, - IRQF_SHARED, + IRQF_SHARED | IRQF_ONESHOT, self->ndev->name, self); if (err < 0) goto err_exit; -- cgit v1.2.1 From be6cef69ba570ebb327eba1ef6438f7af49aaf86 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Fri, 30 Aug 2019 12:08:38 +0000 Subject: net: aquantia: fix out of memory condition on rx side On embedded environments with hard memory limits it is a normal although rare case when skb can't be allocated on rx part under high traffic. In such OOM cases napi_complete_done() was not called. So the napi object became in an invalid state like it is "scheduled". Kernel do not re-schedules the poll of that napi object. Consequently, kernel can not remove that object the system hangs on `ifconfig down` waiting for a poll. We are fixing this by gracefully closing napi poll routine with correct invocation of napi_complete_done. This was reproduced with artificially failing the allocation of skb to simulate an "out of memory" error case and check that traffic does not get stuck. Fixes: 970a2e9864b0 ("net: ethernet: aquantia: Vector operations") Signed-off-by: Igor Russkikh Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_vec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index 715685aa48c3..28892b8acd0e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -86,6 +86,7 @@ static int aq_vec_poll(struct napi_struct *napi, int budget) } } +err_exit: if (!was_tx_cleaned) work_done = budget; @@ -95,7 +96,7 @@ static int aq_vec_poll(struct napi_struct *napi, int budget) 1U << self->aq_ring_param.vec_idx); } } -err_exit: + return work_done; } -- cgit v1.2.1 From de5eb9e00eba00783f49da37377bd2a93d38fe42 Mon Sep 17 00:00:00 2001 From: Razvan Stefanescu Date: Fri, 30 Aug 2019 10:52:01 +0300 Subject: dt-bindings: net: dsa: document additional Microchip KSZ8563 switch It is a 3-Port 10/100 Ethernet Switch with 1588v2 PTP. Signed-off-by: Razvan Stefanescu Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/ksz.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/dsa/ksz.txt b/Documentation/devicetree/bindings/net/dsa/ksz.txt index 4ac21cef370e..113e7ac79aad 100644 --- a/Documentation/devicetree/bindings/net/dsa/ksz.txt +++ b/Documentation/devicetree/bindings/net/dsa/ksz.txt @@ -12,6 +12,7 @@ Required properties: - "microchip,ksz8565" - "microchip,ksz9893" - "microchip,ksz9563" + - "microchip,ksz8563" Optional properties: -- cgit v1.2.1 From d9033ae95cf445150fcc5856ccf024f41f0bd0b9 Mon Sep 17 00:00:00 2001 From: Razvan Stefanescu Date: Fri, 30 Aug 2019 10:52:02 +0300 Subject: net: dsa: microchip: add KSZ8563 compatibility string It is a 3-Port 10/100 Ethernet Switch with 1588v2 PTP. Signed-off-by: Razvan Stefanescu Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz9477_spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index 5a9e27b337a8..098b01e4ed1a 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -81,6 +81,7 @@ static const struct of_device_id ksz9477_dt_ids[] = { { .compatible = "microchip,ksz9897" }, { .compatible = "microchip,ksz9893" }, { .compatible = "microchip,ksz9563" }, + { .compatible = "microchip,ksz8563" }, {}, }; MODULE_DEVICE_TABLE(of, ksz9477_dt_ids); -- cgit v1.2.1 From 02a3f0d5a70a865d55c4b7cb2e327cb30491f7fd Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 30 Aug 2019 18:51:47 +0200 Subject: tc-testing: don't hardcode 'ip' in nsPlugin.py the following tdc test fails on Fedora: # ./tdc.py -e 2638 -- ns/SubPlugin.__init__ Test 2638: Add matchall and try to get it -----> prepare stage *** Could not execute: "$TC qdisc add dev $DEV1 clsact" -----> prepare stage *** Error message: "/bin/sh: ip: command not found" returncode 127; expected [0] -----> prepare stage *** Aborting test run. Let nsPlugin.py use the 'IP' variable introduced with commit 92c1a19e2fb9 ("tc-tests: added path to ip command in tdc"), so that the path to 'ip' is correctly resolved to the value we have in tdc_config.py. # ./tdc.py -e 2638 -- ns/SubPlugin.__init__ Test 2638: Add matchall and try to get it All test results: 1..1 ok 1 2638 - Add matchall and try to get it Fixes: 489ce2f42514 ("tc-testing: Restore original behaviour for namespaces in tdc") Reported-by: Hangbin Liu Signed-off-by: Davide Caratti Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- .../selftests/tc-testing/plugin-lib/nsPlugin.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index affa7f2d9670..9539cffa9e5e 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -64,7 +64,7 @@ class SubPlugin(TdcPlugin): cmdlist.insert(0, self.args.NAMES['NS']) cmdlist.insert(0, 'exec') cmdlist.insert(0, 'netns') - cmdlist.insert(0, 'ip') + cmdlist.insert(0, self.args.NAMES['IP']) else: pass @@ -78,16 +78,16 @@ class SubPlugin(TdcPlugin): return command def _ports_create(self): - cmd = 'ip link add $DEV0 type veth peer name $DEV1' + cmd = '$IP link add $DEV0 type veth peer name $DEV1' self._exec_cmd('pre', cmd) - cmd = 'ip link set $DEV0 up' + cmd = '$IP link set $DEV0 up' self._exec_cmd('pre', cmd) if not self.args.namespace: - cmd = 'ip link set $DEV1 up' + cmd = '$IP link set $DEV1 up' self._exec_cmd('pre', cmd) def _ports_destroy(self): - cmd = 'ip link del $DEV0' + cmd = '$IP link del $DEV0' self._exec_cmd('post', cmd) def _ns_create(self): @@ -97,16 +97,16 @@ class SubPlugin(TdcPlugin): ''' self._ports_create() if self.args.namespace: - cmd = 'ip netns add {}'.format(self.args.NAMES['NS']) + cmd = '$IP netns add {}'.format(self.args.NAMES['NS']) self._exec_cmd('pre', cmd) - cmd = 'ip link set $DEV1 netns {}'.format(self.args.NAMES['NS']) + cmd = '$IP link set $DEV1 netns {}'.format(self.args.NAMES['NS']) self._exec_cmd('pre', cmd) - cmd = 'ip -n {} link set $DEV1 up'.format(self.args.NAMES['NS']) + cmd = '$IP -n {} link set $DEV1 up'.format(self.args.NAMES['NS']) self._exec_cmd('pre', cmd) if self.args.device: - cmd = 'ip link set $DEV2 netns {}'.format(self.args.NAMES['NS']) + cmd = '$IP link set $DEV2 netns {}'.format(self.args.NAMES['NS']) self._exec_cmd('pre', cmd) - cmd = 'ip -n {} link set $DEV2 up'.format(self.args.NAMES['NS']) + cmd = '$IP -n {} link set $DEV2 up'.format(self.args.NAMES['NS']) self._exec_cmd('pre', cmd) def _ns_destroy(self): @@ -115,7 +115,7 @@ class SubPlugin(TdcPlugin): devices as well) ''' if self.args.namespace: - cmd = 'ip netns delete {}'.format(self.args.NAMES['NS']) + cmd = '$IP netns delete {}'.format(self.args.NAMES['NS']) self._exec_cmd('post', cmd) def _exec_cmd(self, stage, command): -- cgit v1.2.1 From dd1bf47a84fe64edf50f6a83fdc164d0da6fa021 Mon Sep 17 00:00:00 2001 From: "Ryan M. Collins" Date: Fri, 30 Aug 2019 14:49:55 -0400 Subject: net: bcmgenet: use ethtool_op_get_ts_info() This change enables the use of SW timestamping on the Raspberry Pi 4. bcmgenet's transmit function bcmgenet_xmit() implements software timestamping. However the SOF_TIMESTAMPING_TX_SOFTWARE capability was missing and only SOF_TIMESTAMPING_RX_SOFTWARE was announced. By using ethtool_ops bcmgenet_ethtool_ops() as get_ts_info(), the SOF_TIMESTAMPING_TX_SOFTWARE capability is announced. Similar to commit a8f5cb9e7991 ("smsc95xx: use ethtool_op_get_ts_info()") Signed-off-by: Ryan M. Collins Acked-by: Florian Fainelli Acked-by: Doug Berger Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index d3a0b614dbfa..b22196880d6d 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1124,6 +1124,7 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = { .set_coalesce = bcmgenet_set_coalesce, .get_link_ksettings = bcmgenet_get_link_ksettings, .set_link_ksettings = bcmgenet_set_link_ksettings, + .get_ts_info = ethtool_op_get_ts_info, }; /* Power down the unimac, based on mode. */ -- cgit v1.2.1 From dd7078f05e1b774a9e8c9f117101d97e4ccd0691 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 30 Aug 2019 22:23:12 +0200 Subject: enetc: Add missing call to 'pci_free_irq_vectors()' in probe and remove functions Call to 'pci_free_irq_vectors()' are missing both in the error handling path of the probe function, and in the remove function. Add them. Fixes: 19971f5ea0ab ("enetc: add PTP clock driver") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_ptp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c index 2fd2586e42bf..bc594892507a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c @@ -82,7 +82,7 @@ static int enetc_ptp_probe(struct pci_dev *pdev, n = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX); if (n != 1) { err = -EPERM; - goto err_irq; + goto err_irq_vectors; } ptp_qoriq->irq = pci_irq_vector(pdev, 0); @@ -107,6 +107,8 @@ static int enetc_ptp_probe(struct pci_dev *pdev, err_no_clock: free_irq(ptp_qoriq->irq, ptp_qoriq); err_irq: + pci_free_irq_vectors(pdev); +err_irq_vectors: iounmap(base); err_ioremap: kfree(ptp_qoriq); @@ -125,6 +127,7 @@ static void enetc_ptp_remove(struct pci_dev *pdev) enetc_phc_index = -1; ptp_qoriq_free(ptp_qoriq); + pci_free_irq_vectors(pdev); kfree(ptp_qoriq); pci_release_mem_regions(pdev); -- cgit v1.2.1 From e1e54ec7fb55501c33b117c111cb0a045b8eded2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 31 Aug 2019 09:17:51 +0200 Subject: net: seeq: Fix the function used to release some memory in an error handling path In commit 99cd149efe82 ("sgiseeq: replace use of dma_cache_wback_inv"), a call to 'get_zeroed_page()' has been turned into a call to 'dma_alloc_coherent()'. Only the remove function has been updated to turn the corresponding 'free_page()' into 'dma_free_attrs()'. The error hndling path of the probe function has not been updated. Fix it now. Rename the corresponding label to something more in line. Fixes: 99cd149efe82 ("sgiseeq: replace use of dma_cache_wback_inv") Signed-off-by: Christophe JAILLET Reviewed-by: Thomas Bogendoerfer Signed-off-by: David S. Miller --- drivers/net/ethernet/seeq/sgiseeq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c index 7a5e6c5abb57..276c7cae7cee 100644 --- a/drivers/net/ethernet/seeq/sgiseeq.c +++ b/drivers/net/ethernet/seeq/sgiseeq.c @@ -794,15 +794,16 @@ static int sgiseeq_probe(struct platform_device *pdev) printk(KERN_ERR "Sgiseeq: Cannot register net device, " "aborting.\n"); err = -ENODEV; - goto err_out_free_page; + goto err_out_free_attrs; } printk(KERN_INFO "%s: %s %pM\n", dev->name, sgiseeqstr, dev->dev_addr); return 0; -err_out_free_page: - free_page((unsigned long) sp->srings); +err_out_free_attrs: + dma_free_attrs(&pdev->dev, sizeof(*sp->srings), sp->srings, + sp->srings_dma, DMA_ATTR_NON_CONSISTENT); err_out_free_dev: free_netdev(dev); -- cgit v1.2.1 From 089cf7f6ecb266b6a4164919a2e69bd2f938374a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 2 Sep 2019 09:57:40 -0700 Subject: Linux 5.3-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f125625efd60..0cbe8717bdb3 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Bobtail Squid # *DOCUMENTATION* -- cgit v1.2.1