diff options
author | Arnd Bergmann <arnd@arndb.de> | 2021-05-08 00:07:47 +0200 |
---|---|---|
committer | Arnd Bergmann <arnd@arndb.de> | 2021-05-10 17:50:04 +0200 |
commit | bf067edf5d2f5b2948ee7197974a719aae3e526c (patch) | |
tree | 5c2f652b41034e10d3c86f4e71b65c43291bf2e1 /arch/openrisc/include/asm | |
parent | 637be9183e0475c430fc77162c222bcaab887989 (diff) | |
download | linux-stable-bf067edf5d2f5b2948ee7197974a719aae3e526c.tar.gz |
openrisc: always use unaligned-struct header
openrisc is the only architecture using the linux/unaligned/*memmove
infrastructure. There is a comment saying that this version is more
efficient, but this was added in 2011 before the openrisc gcc port
was merged upstream.
I checked a couple of files to see what the actual difference is with
the mainline gcc (9.4 and 11.1), and found that the generic header
seems to produce better code now, regardless of the gcc version.
Specifically, the be_memmove leads to allocating a stack slot and
copying the data one byte at a time, then reading the whole word
from the stack:
00000000 <test_get_unaligned_memmove>:
0: 9c 21 ff f4 l.addi r1,r1,-12
4: d4 01 10 04 l.sw 4(r1),r2
8: 8e 63 00 00 l.lbz r19,0(r3)
c: 9c 41 00 0c l.addi r2,r1,12
10: 8e 23 00 01 l.lbz r17,1(r3)
14: db e2 9f f4 l.sb -12(r2),r19
18: db e2 8f f5 l.sb -11(r2),r17
1c: 8e 63 00 02 l.lbz r19,2(r3)
20: 8e 23 00 03 l.lbz r17,3(r3)
24: d4 01 48 08 l.sw 8(r1),r9
28: db e2 9f f6 l.sb -10(r2),r19
2c: db e2 8f f7 l.sb -9(r2),r17
30: 85 62 ff f4 l.lwz r11,-12(r2)
34: 85 21 00 08 l.lwz r9,8(r1)
38: 84 41 00 04 l.lwz r2,4(r1)
3c: 44 00 48 00 l.jr r9
40: 9c 21 00 0c l.addi r1,r1,12
while the be_struct version reads each byte into a register
and does a shift to the right position:
00000000 <test_get_unaligned_struct>:
0: 9c 21 ff f8 l.addi r1,r1,-8
4: 8e 63 00 00 l.lbz r19,0(r3)
8: aa 20 00 18 l.ori r17,r0,0x18
c: e2 73 88 08 l.sll r19,r19,r17
10: 8d 63 00 01 l.lbz r11,1(r3)
14: aa 20 00 10 l.ori r17,r0,0x10
18: e1 6b 88 08 l.sll r11,r11,r17
1c: e1 6b 98 04 l.or r11,r11,r19
20: 8e 23 00 02 l.lbz r17,2(r3)
24: aa 60 00 08 l.ori r19,r0,0x8
28: e2 31 98 08 l.sll r17,r17,r19
2c: d4 01 10 00 l.sw 0(r1),r2
30: d4 01 48 04 l.sw 4(r1),r9
34: 9c 41 00 08 l.addi r2,r1,8
38: e2 31 58 04 l.or r17,r17,r11
3c: 8d 63 00 03 l.lbz r11,3(r3)
40: e1 6b 88 04 l.or r11,r11,r17
44: 84 41 00 00 l.lwz r2,0(r1)
48: 85 21 00 04 l.lwz r9,4(r1)
4c: 44 00 48 00 l.jr r9
50: 9c 21 00 08 l.addi r1,r1,8
According to Stafford Horne, the new version should in fact perform
better.
In the trivial example, the struct version is a few instructions longer,
but building a whole kernel shows an overall reduction in code size,
presumably because it now has to manage fewer stack slots:
text data bss dec hex filename
4792010 181480 82324 5055814 4d2546 vmlinux-unaligned-memmove
4790642 181480 82324 5054446 4d1fee vmlinux-unaligned-struct
Remove the memmove version completely and let openrisc use the same
code as everyone else, as a simplification.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Stafford Horne <shorne@gmail.com>
Diffstat (limited to 'arch/openrisc/include/asm')
-rw-r--r-- | arch/openrisc/include/asm/unaligned.h | 47 |
1 files changed, 0 insertions, 47 deletions
diff --git a/arch/openrisc/include/asm/unaligned.h b/arch/openrisc/include/asm/unaligned.h deleted file mode 100644 index 14353f2101f2..000000000000 --- a/arch/openrisc/include/asm/unaligned.h +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * OpenRISC Linux - * - * Linux architectural port borrowing liberally from similar works of - * others. All original copyrights apply as per the original source - * declaration. - * - * OpenRISC implementation: - * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com> - * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se> - * et al. - */ - -#ifndef __ASM_OPENRISC_UNALIGNED_H -#define __ASM_OPENRISC_UNALIGNED_H - -/* - * This is copied from the generic implementation and the C-struct - * variant replaced with the memmove variant. The GCC compiler - * for the OR32 arch optimizes too aggressively for the C-struct - * variant to work, so use the memmove variant instead. - * - * It may be worth considering implementing the unaligned access - * exception handler and allowing unaligned accesses (access_ok.h)... - * not sure if it would be much of a performance win without further - * investigation. - */ -#include <asm/byteorder.h> - -#if defined(__LITTLE_ENDIAN) -# include <linux/unaligned/le_memmove.h> -# include <linux/unaligned/be_byteshift.h> -# include <linux/unaligned/generic.h> -# define get_unaligned __get_unaligned_le -# define put_unaligned __put_unaligned_le -#elif defined(__BIG_ENDIAN) -# include <linux/unaligned/be_memmove.h> -# include <linux/unaligned/le_byteshift.h> -# include <linux/unaligned/generic.h> -# define get_unaligned __get_unaligned_be -# define put_unaligned __put_unaligned_be -#else -# error need to define endianess -#endif - -#endif /* __ASM_OPENRISC_UNALIGNED_H */ |