diff options
Diffstat (limited to 'fedora/glibc-x86_64-memcpy.patch')
-rw-r--r-- | fedora/glibc-x86_64-memcpy.patch | 1439 |
1 files changed, 0 insertions, 1439 deletions
diff --git a/fedora/glibc-x86_64-memcpy.patch b/fedora/glibc-x86_64-memcpy.patch deleted file mode 100644 index 3888134df8..0000000000 --- a/fedora/glibc-x86_64-memcpy.patch +++ /dev/null @@ -1,1439 +0,0 @@ -2007-05-21 Ulrich Drepper <drepper@redhat.com> - - * sysdeps/x86_64/cacheinfo.c (init_cacheinfo): Pass correct value - as second parameter to handle_intel. - - * sysdeps/unix/sysv/linux/x86_64/sysconf.c: Move cache information - handling to ... - * sysdeps/x86_64/cacheinfo.c: ... here. New file. - * sysdeps/x86_64/Makefile [subdir=string] (sysdep_routines): Add - cacheinfo. - * sysdeps/x86_64/memcpy.S: Complete rewrite. - * sysdeps/x86_64/mempcpy.S: Adjust appropriately. - Patch by Evandro Menezes <evandro.menezes@amd.com>. - ---- libc/sysdeps/x86_64/Makefile 16 Aug 2004 06:46:14 -0000 1.4 -+++ libc/sysdeps/x86_64/Makefile 21 May 2007 19:20:45 -0000 1.5 -@@ -9,3 +9,7 @@ endif - ifeq ($(subdir),gmon) - sysdep_routines += _mcount - endif -+ -+ifeq ($(subdir),string) -+sysdep_routines += cacheinfo -+endif ---- libc/sysdeps/x86_64/cacheinfo.c 1 Jan 1970 00:00:00 -0000 -+++ libc/sysdeps/x86_64/cacheinfo.c 21 May 2007 22:37:45 -0000 1.2 -@@ -0,0 +1,451 @@ -+/* x86_64 cache info. -+ Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library; if not, write to the Free -+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ 02111-1307 USA. -+*/ -+ -+#include <assert.h> -+#include <stdbool.h> -+#include <stdlib.h> -+#include <unistd.h> -+ -+static const struct intel_02_cache_info -+{ -+ unsigned int idx; -+ int name; -+ long int size; -+ long int assoc; -+ long int linesize; -+} intel_02_known [] = -+ { -+ { 0x06, _SC_LEVEL1_ICACHE_SIZE, 8192, 4, 32 }, -+ { 0x08, _SC_LEVEL1_ICACHE_SIZE, 16384, 4, 32 }, -+ { 0x0a, _SC_LEVEL1_DCACHE_SIZE, 8192, 2, 32 }, -+ { 0x0c, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 32 }, -+ { 0x22, _SC_LEVEL3_CACHE_SIZE, 524288, 4, 64 }, -+ { 0x23, _SC_LEVEL3_CACHE_SIZE, 1048576, 8, 64 }, -+ { 0x25, _SC_LEVEL3_CACHE_SIZE, 2097152, 8, 64 }, -+ { 0x29, _SC_LEVEL3_CACHE_SIZE, 4194304, 8, 64 }, -+ { 0x2c, _SC_LEVEL1_DCACHE_SIZE, 32768, 8, 64 }, -+ { 0x30, _SC_LEVEL1_ICACHE_SIZE, 32768, 8, 64 }, -+ { 0x39, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 64 }, -+ { 0x3a, _SC_LEVEL2_CACHE_SIZE, 196608, 6, 64 }, -+ { 0x3b, _SC_LEVEL2_CACHE_SIZE, 131072, 2, 64 }, -+ { 0x3c, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 64 }, -+ { 0x3d, _SC_LEVEL2_CACHE_SIZE, 393216, 6, 64 }, -+ { 0x3e, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 }, -+ { 0x41, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 32 }, -+ { 0x42, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 32 }, -+ { 0x43, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 32 }, -+ { 0x44, _SC_LEVEL2_CACHE_SIZE, 1048576, 4, 32 }, -+ { 0x45, _SC_LEVEL2_CACHE_SIZE, 2097152, 4, 32 }, -+ { 0x46, _SC_LEVEL3_CACHE_SIZE, 4194304, 4, 64 }, -+ { 0x47, _SC_LEVEL3_CACHE_SIZE, 8388608, 8, 64 }, -+ { 0x49, _SC_LEVEL2_CACHE_SIZE, 4194304, 16, 64 }, -+ { 0x4a, _SC_LEVEL3_CACHE_SIZE, 6291456, 12, 64 }, -+ { 0x4b, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 }, -+ { 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 }, -+ { 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 }, -+ { 0x60, _SC_LEVEL1_DCACHE_SIZE, 16384, 8, 64 }, -+ { 0x66, _SC_LEVEL1_DCACHE_SIZE, 8192, 4, 64 }, -+ { 0x67, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 }, -+ { 0x68, _SC_LEVEL1_DCACHE_SIZE, 32768, 4, 64 }, -+ { 0x78, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 }, -+ { 0x79, _SC_LEVEL2_CACHE_SIZE, 131072, 8, 64 }, -+ { 0x7a, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 64 }, -+ { 0x7b, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 64 }, -+ { 0x7c, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 }, -+ { 0x7d, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 64 }, -+ { 0x7f, _SC_LEVEL2_CACHE_SIZE, 524288, 2, 64 }, -+ { 0x82, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 32 }, -+ { 0x83, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 32 }, -+ { 0x84, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 32 }, -+ { 0x85, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 32 }, -+ { 0x86, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 }, -+ { 0x87, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 }, -+ }; -+ -+#define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0])) -+ -+static int -+intel_02_known_compare (const void *p1, const void *p2) -+{ -+ const struct intel_02_cache_info *i1; -+ const struct intel_02_cache_info *i2; -+ -+ i1 = (const struct intel_02_cache_info *) p1; -+ i2 = (const struct intel_02_cache_info *) p2; -+ -+ if (i1->idx == i2->idx) -+ return 0; -+ -+ return i1->idx < i2->idx ? -1 : 1; -+} -+ -+ -+static long int -+__attribute__ ((noinline)) -+intel_check_word (int name, unsigned int value, bool *has_level_2, -+ bool *no_level_2_or_3) -+{ -+ if ((value & 0x80000000) != 0) -+ /* The register value is reserved. */ -+ return 0; -+ -+ /* Fold the name. The _SC_ constants are always in the order SIZE, -+ ASSOC, LINESIZE. */ -+ int folded_name = (_SC_LEVEL1_ICACHE_SIZE -+ + ((name - _SC_LEVEL1_ICACHE_SIZE) / 3) * 3); -+ -+ while (value != 0) -+ { -+ unsigned int byte = value & 0xff; -+ -+ if (byte == 0x40) -+ { -+ *no_level_2_or_3 = true; -+ -+ if (folded_name == _SC_LEVEL3_CACHE_SIZE) -+ /* No need to look further. */ -+ break; -+ } -+ else -+ { -+ if (byte == 0x49 && folded_name == _SC_LEVEL3_CACHE_SIZE) -+ { -+ /* Intel reused this value. For family 15, model 6 it -+ specifies the 3rd level cache. Otherwise the 2nd -+ level cache. */ -+ unsigned int eax; -+ unsigned int ebx; -+ unsigned int ecx; -+ unsigned int edx; -+ asm volatile ("cpuid" -+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) -+ : "0" (1)); -+ -+ unsigned int family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf); -+ unsigned int model = ((((eax >>16) & 0xf) << 4) -+ + ((eax >> 4) & 0xf)); -+ if (family == 15 && model == 6) -+ { -+ /* The level 3 cache is encoded for this model like -+ the level 2 cache is for other models. Pretend -+ the caller asked for the level 2 cache. */ -+ name = (_SC_LEVEL2_CACHE_SIZE -+ + (name - _SC_LEVEL3_CACHE_SIZE)); -+ folded_name = _SC_LEVEL3_CACHE_SIZE; -+ } -+ } -+ -+ struct intel_02_cache_info *found; -+ struct intel_02_cache_info search; -+ -+ search.idx = byte; -+ found = bsearch (&search, intel_02_known, nintel_02_known, -+ sizeof (intel_02_known[0]), intel_02_known_compare); -+ if (found != NULL) -+ { -+ if (found->name == folded_name) -+ { -+ unsigned int offset = name - folded_name; -+ -+ if (offset == 0) -+ /* Cache size. */ -+ return found->size; -+ if (offset == 1) -+ return found->assoc; -+ -+ assert (offset == 2); -+ return found->linesize; -+ } -+ -+ if (found->name == _SC_LEVEL2_CACHE_SIZE) -+ *has_level_2 = true; -+ } -+ } -+ -+ /* Next byte for the next round. */ -+ value >>= 8; -+ } -+ -+ /* Nothing found. */ -+ return 0; -+} -+ -+ -+static long int __attribute__ ((noinline)) -+handle_intel (int name, unsigned int maxidx) -+{ -+ assert (maxidx >= 2); -+ -+ /* OK, we can use the CPUID instruction to get all info about the -+ caches. */ -+ unsigned int cnt = 0; -+ unsigned int max = 1; -+ long int result = 0; -+ bool no_level_2_or_3 = false; -+ bool has_level_2 = false; -+ -+ while (cnt++ < max) -+ { -+ unsigned int eax; -+ unsigned int ebx; -+ unsigned int ecx; -+ unsigned int edx; -+ asm volatile ("cpuid" -+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) -+ : "0" (2)); -+ -+ /* The low byte of EAX in the first round contain the number of -+ rounds we have to make. At least one, the one we are already -+ doing. */ -+ if (cnt == 1) -+ { -+ max = eax & 0xff; -+ eax &= 0xffffff00; -+ } -+ -+ /* Process the individual registers' value. */ -+ result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3); -+ if (result != 0) -+ return result; -+ -+ result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3); -+ if (result != 0) -+ return result; -+ -+ result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3); -+ if (result != 0) -+ return result; -+ -+ result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3); -+ if (result != 0) -+ return result; -+ } -+ -+ if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE -+ && no_level_2_or_3) -+ return -1; -+ -+ return 0; -+} -+ -+ -+static long int __attribute__ ((noinline)) -+handle_amd (int name) -+{ -+ unsigned int eax; -+ unsigned int ebx; -+ unsigned int ecx; -+ unsigned int edx; -+ asm volatile ("cpuid" -+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) -+ : "0" (0x80000000)); -+ -+ if (name >= _SC_LEVEL3_CACHE_SIZE) -+ return 0; -+ -+ unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE); -+ if (eax < fn) -+ return 0; -+ -+ asm volatile ("cpuid" -+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) -+ : "0" (fn)); -+ -+ if (name < _SC_LEVEL1_DCACHE_SIZE) -+ { -+ name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE; -+ ecx = edx; -+ } -+ -+ switch (name) -+ { -+ case _SC_LEVEL1_DCACHE_SIZE: -+ return (ecx >> 14) & 0x3fc00; -+ case _SC_LEVEL1_DCACHE_ASSOC: -+ ecx >>= 16; -+ if ((ecx & 0xff) == 0xff) -+ /* Fully associative. */ -+ return (ecx << 2) & 0x3fc00; -+ return ecx & 0xff; -+ case _SC_LEVEL1_DCACHE_LINESIZE: -+ return ecx & 0xff; -+ case _SC_LEVEL2_CACHE_SIZE: -+ return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00; -+ case _SC_LEVEL2_CACHE_ASSOC: -+ ecx >>= 12; -+ switch (ecx & 0xf) -+ { -+ case 0: -+ case 1: -+ case 2: -+ case 4: -+ return ecx & 0xf; -+ case 6: -+ return 8; -+ case 8: -+ return 16; -+ case 0xf: -+ return (ecx << 6) & 0x3fffc00; -+ default: -+ return 0; -+ } -+ case _SC_LEVEL2_CACHE_LINESIZE: -+ return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff; -+ default: -+ assert (! "cannot happen"); -+ } -+ return -1; -+} -+ -+ -+/* Get the value of the system variable NAME. */ -+long int -+attribute_hidden -+__cache_sysconf (int name) -+{ -+ /* Find out what brand of processor. */ -+ unsigned int eax; -+ unsigned int ebx; -+ unsigned int ecx; -+ unsigned int edx; -+ asm volatile ("cpuid" -+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) -+ : "0" (0)); -+ -+ /* This spells out "GenuineIntel". */ -+ if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) -+ return handle_intel (name, eax); -+ -+ /* This spells out "AuthenticAMD". */ -+ if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) -+ return handle_amd (name); -+ -+ // XXX Fill in more vendors. -+ -+ /* CPU not known, we have no information. */ -+ return 0; -+} -+ -+ -+/* Half the core cache size for use in memory and string routines, typically -+ L1 size. */ -+long int __x86_64_core_cache_size_half attribute_hidden = 32 * 1024 / 2; -+/* Shared cache size for use in memory and string routines, typically -+ L2 or L3 size. */ -+long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2; -+/* PREFETCHW support flag for use in memory and string routines. */ -+int __x86_64_prefetchw attribute_hidden; -+ -+ -+static void -+__attribute__((constructor)) -+init_cacheinfo (void) -+{ -+ /* Find out what brand of processor. */ -+ unsigned int eax; -+ unsigned int ebx; -+ unsigned int ecx; -+ unsigned int edx; -+ int max_cpuid; -+ int max_cpuid_ex; -+ long int core = -1; -+ long int shared = -1; -+ unsigned int level; -+ unsigned int threads = 0; -+ -+ asm volatile ("cpuid" -+ : "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx) -+ : "0" (0)); -+ -+ /* This spells out "GenuineIntel". */ -+ if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) -+ { -+ core = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid); -+ -+ /* Try L3 first. */ -+ level = 3; -+ shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid); -+ -+ if (shared <= 0) -+ { -+ /* Try L2 otherwise. */ -+ level = 2; -+ shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid); -+ } -+ -+ /* Figure out the number of logical threads that share the -+ highest cache level. */ -+ if (max_cpuid >= 4) -+ { -+ int i = 0; -+ -+ /* Query until desired cache level is enumerated. */ -+ do -+ { -+ asm volatile ("cpuid" -+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) -+ : "0" (4), "2" (i++)); -+ } -+ while (((eax >> 5) & 0x7) != level); -+ -+ threads = ((eax >> 14) & 0x3ff) + 1; -+ } -+ else -+ { -+ /* Assume that all logical threads share the highest cache level. */ -+ asm volatile ("cpuid" -+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) -+ : "0" (1)); -+ -+ threads = (ebx >> 16) & 0xff; -+ } -+ -+ /* Cap usage of highest cache level to the number of supported -+ threads. */ -+ if (shared > 0 && threads > 0) -+ shared /= threads; -+ } -+ /* This spells out "AuthenticAMD". */ -+ else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) -+ { -+ core = handle_amd (_SC_LEVEL1_DCACHE_SIZE); -+ shared = handle_amd (_SC_LEVEL2_CACHE_SIZE); -+ -+ asm volatile ("cpuid" -+ : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx), "=d" (edx) -+ : "0" (0x80000000)); -+ -+ if (max_cpuid_ex >= 0x80000001) -+ { -+ asm volatile ("cpuid" -+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) -+ : "0" (0x80000001)); -+ /* PREFETCHW || 3DNow! */ -+ if ((ecx & 0x100) || (edx & 0x80000000)) -+ __x86_64_prefetchw = -1; -+ } -+ } -+ -+ if (core > 0) -+ __x86_64_core_cache_size_half = core / 2; -+ -+ if (shared > 0) -+ __x86_64_shared_cache_size_half = shared / 2; -+} ---- libc/sysdeps/x86_64/memcpy.S 18 Oct 2004 04:17:08 -0000 1.5 -+++ libc/sysdeps/x86_64/memcpy.S 21 May 2007 19:21:01 -0000 1.6 -@@ -1,7 +1,10 @@ --/* Highly optimized version for x86-64. -- Copyright (C) 1997, 2000, 2002, 2003, 2004 Free Software Foundation, Inc. -+/* -+ Optimized memcpy for x86-64. -+ -+ Copyright (C) 2007 Free Software Foundation, Inc. -+ Contributed by Evandro Menezes <evandro.menezes@amd.com>, 2007. -+ - This file is part of the GNU C Library. -- Based on i586 version contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public -@@ -16,86 +19,556 @@ - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -- 02111-1307 USA. */ -+ 02111-1307 USA. -+*/ - - #include <sysdep.h> - #include "asm-syntax.h" --#include "bp-sym.h" --#include "bp-asm.h" - --/* BEWARE: `#ifdef memcpy' means that memcpy is redefined as `mempcpy', -- and the return value is the byte after the last one copied in -- the destination. */ --#define MEMPCPY_P (defined memcpy) -+/* Stack slots in the red-zone. */ -+ -+#ifdef USE_AS_MEMPCPY -+# define RETVAL (0) -+#else -+# define RETVAL (-8) -+#endif -+#define SAVE0 (RETVAL - 8) -+#define SAVE1 (SAVE0 - 8) -+#define SAVE2 (SAVE1 - 8) -+#define SAVE3 (SAVE2 - 8) - - .text -+ - #if defined PIC && !defined NOT_IN_libc - ENTRY (__memcpy_chk) -+ - cmpq %rdx, %rcx - jb HIDDEN_JUMPTARGET (__chk_fail) -+ - END (__memcpy_chk) - #endif --ENTRY (BP_SYM (memcpy)) -- /* Cutoff for the big loop is a size of 32 bytes since otherwise -- the loop will never be entered. */ -+ -+ENTRY(memcpy) /* (void *, const void*, size_t) */ -+ -+/* Handle tiny blocks. */ -+ -+L(1try): /* up to 32B */ - cmpq $32, %rdx -- movq %rdx, %rcx --#if !MEMPCPY_P -- movq %rdi, %r10 /* Save value. */ -+#ifndef USE_AS_MEMPCPY -+ movq %rdi, %rax /* save return value */ - #endif -+ jae L(1after) - -- /* We need this in any case. */ -- cld -+L(1): /* 1-byte once */ -+ testb $1, %dl -+ jz L(1a) - -- jbe 1f -+ movzbl (%rsi), %ecx -+ movb %cl, (%rdi) - -- /* Align destination. */ -- movq %rdi, %rax -- negq %rax -- andq $7, %rax -- subq %rax, %rcx -- xchgq %rax, %rcx -+ incq %rsi -+ incq %rdi -+ -+ .p2align 4,, 4 -+ -+L(1a): /* 2-byte once */ -+ testb $2, %dl -+ jz L(1b) -+ -+ movzwl (%rsi), %ecx -+ movw %cx, (%rdi) - -- rep; movsb -+ addq $2, %rsi -+ addq $2, %rdi - -- movq %rax, %rcx -- subq $32, %rcx -- js 2f -+ .p2align 4,, 4 -+ -+L(1b): /* 4-byte once */ -+ testb $4, %dl -+ jz L(1c) -+ -+ movl (%rsi), %ecx -+ movl %ecx, (%rdi) -+ -+ addq $4, %rsi -+ addq $4, %rdi -+ -+ .p2align 4,, 4 -+ -+L(1c): /* 8-byte once */ -+ testb $8, %dl -+ jz L(1d) -+ -+ movq (%rsi), %rcx -+ movq %rcx, (%rdi) -+ -+ addq $8, %rsi -+ addq $8, %rdi -+ -+ .p2align 4,, 4 -+ -+L(1d): /* 16-byte loop */ -+ andl $0xf0, %edx -+ jz L(exit) - - .p2align 4 --3: - -- /* Now correct the loop counter. Please note that in the following -- code the flags are not changed anymore. */ -- subq $32, %rcx -+L(1loop): -+ movq (%rsi), %rcx -+ movq 8 (%rsi), %r8 -+ movq %rcx, (%rdi) -+ movq %r8, 8 (%rdi) -+ -+ subl $16, %edx -+ -+ leaq 16 (%rsi), %rsi -+ leaq 16 (%rdi), %rdi -+ -+ jnz L(1loop) -+ -+ .p2align 4,, 4 -+ -+L(exit): /* exit */ -+#ifdef USE_AS_MEMPCPY -+ movq %rdi, %rax /* return value */ -+#else -+ rep -+#endif -+ retq -+ -+ .p2align 4 -+ -+L(1after): -+#ifndef USE_AS_MEMPCPY -+ movq %rax, RETVAL (%rsp) /* save return value */ -+#endif -+ -+/* Align to the natural word size. */ -+ -+L(aligntry): -+ movl %esi, %ecx /* align by destination */ -+ -+ andl $7, %ecx -+ jz L(alignafter) /* already aligned */ -+ -+L(align): /* align */ -+ leaq -8 (%rcx, %rdx), %rdx /* calculate remaining bytes */ -+ subl $8, %ecx -+ -+ .p2align 4 -+ -+L(alignloop): /* 1-byte alignment loop */ -+ movzbl (%rsi), %eax -+ movb %al, (%rdi) -+ -+ incl %ecx -+ -+ leaq 1 (%rsi), %rsi -+ leaq 1 (%rdi), %rdi -+ -+ jnz L(alignloop) -+ -+ .p2align 4 -+ -+L(alignafter): -+ -+/* Loop to handle mid-sized blocks. */ -+ -+L(32try): /* up to 1KB */ -+ cmpq $1024, %rdx -+ ja L(32after) -+ -+L(32): /* 32-byte loop */ -+ movl %edx, %ecx -+ shrl $5, %ecx -+ jz L(32skip) -+ -+ .p2align 4 -+ -+L(32loop): -+ decl %ecx - - movq (%rsi), %rax -- movq 8(%rsi), %rdx -- movq 16(%rsi), %r8 -- movq 24(%rsi), %r9 -+ movq 8 (%rsi), %r8 -+ movq 16 (%rsi), %r9 -+ movq 24 (%rsi), %r10 -+ - movq %rax, (%rdi) -- movq %rdx, 8(%rdi) -- movq %r8, 16(%rdi) -- movq %r9, 24(%rdi) -+ movq %r8, 8 (%rdi) -+ movq %r9, 16 (%rdi) -+ movq %r10, 24 (%rdi) - - leaq 32(%rsi), %rsi - leaq 32(%rdi), %rdi - -- jns 3b -+ jz L(32skip) /* help out smaller blocks */ -+ -+ decl %ecx -+ -+ movq (%rsi), %rax -+ movq 8 (%rsi), %r8 -+ movq 16 (%rsi), %r9 -+ movq 24 (%rsi), %r10 -+ -+ movq %rax, (%rdi) -+ movq %r8, 8 (%rdi) -+ movq %r9, 16 (%rdi) -+ movq %r10, 24 (%rdi) -+ -+ leaq 32 (%rsi), %rsi -+ leaq 32 (%rdi), %rdi -+ -+ jnz L(32loop) -+ -+ .p2align 4 - -- /* Correct extra loop counter modification. */ --2: addq $32, %rcx --1: rep; movsb -+L(32skip): -+ andl $31, %edx /* check for left overs */ -+#ifdef USE_AS_MEMPCPY -+ jnz L(1) - --#if MEMPCPY_P -- movq %rdi, %rax /* Set return value. */ -+ movq %rdi, %rax - #else -- movq %r10, %rax /* Set return value. */ -+ movq RETVAL (%rsp), %rax -+ jnz L(1) - -+ rep -+#endif -+ retq /* exit */ -+ -+ .p2align 4 -+ -+L(32after): -+ -+/* -+ In order to minimize code-size in RTLD, algorithms specific for -+ larger blocks are excluded when building for RTLD. -+*/ -+ -+/* Handle large blocks smaller than 1/2 L1. */ -+ -+L(fasttry): /* first 1/2 L1 */ -+#ifndef NOT_IN_libc /* only up to this algorithm outside of libc.so */ -+ movq __x86_64_core_cache_size_half (%rip), %r11 -+ cmpq %rdx, %r11 /* calculate the smaller of */ -+ cmovaq %rdx, %r11 /* remaining bytes and 1/2 L1 */ -+#endif -+ -+L(fast): /* good ol' MOVS */ -+#ifndef NOT_IN_libc -+ movq %r11, %rcx -+ andq $-8, %r11 -+#else -+ movq %rdx, %rcx -+#endif -+ shrq $3, %rcx -+ jz L(fastskip) -+ -+ rep -+ movsq -+ -+ .p2align 4,, 4 -+ -+L(fastskip): -+#ifndef NOT_IN_libc -+ subq %r11, %rdx /* check for more */ -+ testq $-8, %rdx -+ jnz L(fastafter) - #endif -- ret - --END (BP_SYM (memcpy)) --#if !MEMPCPY_P -+ andl $7, %edx /* check for left overs */ -+#ifdef USE_AS_MEMPCPY -+ jnz L(1) -+ -+ movq %rdi, %rax -+#else -+ movq RETVAL (%rsp), %rax -+ jnz L(1) -+ -+ rep -+#endif -+ retq /* exit */ -+ -+#ifndef NOT_IN_libc /* none of the algorithms below for RTLD */ -+ -+ .p2align 4 -+ -+L(fastafter): -+ -+/* Handle large blocks smaller than 1/2 L2. */ -+ -+L(pretry): /* first 1/2 L2 */ -+ movq __x86_64_shared_cache_size_half (%rip), %r8 -+ cmpq %rdx, %r8 /* calculate the lesser of */ -+ cmovaq %rdx, %r8 /* remaining bytes and 1/2 L2 */ -+ -+L(pre): /* 64-byte with prefetching */ -+ movq %r8, %rcx -+ andq $-64, %r8 -+ shrq $6, %rcx -+ jz L(preskip) -+ -+ movq %r14, SAVE0 (%rsp) -+ cfi_rel_offset (%r14, SAVE0) -+ movq %r13, SAVE1 (%rsp) -+ cfi_rel_offset (%r13, SAVE1) -+ movq %r12, SAVE2 (%rsp) -+ cfi_rel_offset (%r12, SAVE2) -+ movq %rbx, SAVE3 (%rsp) -+ cfi_rel_offset (%rbx, SAVE3) -+ -+ cmpl $0, __x86_64_prefetchw (%rip) -+ jz L(preloop) /* check if PREFETCHW OK */ -+ -+ .p2align 4 -+ -+/* ... when PREFETCHW is available (less cache-probe traffic in MP systems). */ -+ -+L(prewloop): /* cache-line in state M */ -+ decq %rcx -+ -+ movq (%rsi), %rax -+ movq 8 (%rsi), %rbx -+ movq 16 (%rsi), %r9 -+ movq 24 (%rsi), %r10 -+ movq 32 (%rsi), %r11 -+ movq 40 (%rsi), %r12 -+ movq 48 (%rsi), %r13 -+ movq 56 (%rsi), %r14 -+ -+ prefetcht0 0 + 896 (%rsi) -+ prefetcht0 64 + 896 (%rsi) -+ -+ movq %rax, (%rdi) -+ movq %rbx, 8 (%rdi) -+ movq %r9, 16 (%rdi) -+ movq %r10, 24 (%rdi) -+ movq %r11, 32 (%rdi) -+ movq %r12, 40 (%rdi) -+ movq %r13, 48 (%rdi) -+ movq %r14, 56 (%rdi) -+ -+ leaq 64 (%rsi), %rsi -+ leaq 64 (%rdi), %rdi -+ -+ jz L(prebail) -+ -+ decq %rcx -+ -+ movq (%rsi), %rax -+ movq 8 (%rsi), %rbx -+ movq 16 (%rsi), %r9 -+ movq 24 (%rsi), %r10 -+ movq 32 (%rsi), %r11 -+ movq 40 (%rsi), %r12 -+ movq 48 (%rsi), %r13 -+ movq 56 (%rsi), %r14 -+ -+ movq %rax, (%rdi) -+ movq %rbx, 8 (%rdi) -+ movq %r9, 16 (%rdi) -+ movq %r10, 24 (%rdi) -+ movq %r11, 32 (%rdi) -+ movq %r12, 40 (%rdi) -+ movq %r13, 48 (%rdi) -+ movq %r14, 56 (%rdi) -+ -+ prefetchw 896 - 64 (%rdi) -+ prefetchw 896 - 0 (%rdi) -+ -+ leaq 64 (%rsi), %rsi -+ leaq 64 (%rdi), %rdi -+ -+ jnz L(prewloop) -+ jmp L(prebail) -+ -+ .p2align 4 -+ -+/* ... when PREFETCHW is not available. */ -+ -+L(preloop): /* cache-line in state E */ -+ decq %rcx -+ -+ movq (%rsi), %rax -+ movq 8 (%rsi), %rbx -+ movq 16 (%rsi), %r9 -+ movq 24 (%rsi), %r10 -+ movq 32 (%rsi), %r11 -+ movq 40 (%rsi), %r12 -+ movq 48 (%rsi), %r13 -+ movq 56 (%rsi), %r14 -+ -+ prefetcht0 896 + 0 (%rsi) -+ prefetcht0 896 + 64 (%rsi) -+ -+ movq %rax, (%rdi) -+ movq %rbx, 8 (%rdi) -+ movq %r9, 16 (%rdi) -+ movq %r10, 24 (%rdi) -+ movq %r11, 32 (%rdi) -+ movq %r12, 40 (%rdi) -+ movq %r13, 48 (%rdi) -+ movq %r14, 56 (%rdi) -+ -+ leaq 64 (%rsi), %rsi -+ leaq 64 (%rdi), %rdi -+ -+ jz L(prebail) -+ -+ decq %rcx -+ -+ movq (%rsi), %rax -+ movq 8 (%rsi), %rbx -+ movq 16 (%rsi), %r9 -+ movq 24 (%rsi), %r10 -+ movq 32 (%rsi), %r11 -+ movq 40 (%rsi), %r12 -+ movq 48 (%rsi), %r13 -+ movq 56 (%rsi), %r14 -+ -+ prefetcht0 896 - 64 (%rdi) -+ prefetcht0 896 - 0 (%rdi) -+ -+ movq %rax, (%rdi) -+ movq %rbx, 8 (%rdi) -+ movq %r9, 16 (%rdi) -+ movq %r10, 24 (%rdi) -+ movq %r11, 32 (%rdi) -+ movq %r12, 40 (%rdi) -+ movq %r13, 48 (%rdi) -+ movq %r14, 56 (%rdi) -+ -+ leaq 64 (%rsi), %rsi -+ leaq 64 (%rdi), %rdi -+ -+ jnz L(preloop) -+ -+L(prebail): -+ movq SAVE3 (%rsp), %rbx -+ cfi_restore (%rbx) -+ movq SAVE2 (%rsp), %r12 -+ cfi_restore (%r12) -+ movq SAVE1 (%rsp), %r13 -+ cfi_restore (%r13) -+ movq SAVE0 (%rsp), %r14 -+ cfi_restore (%r14) -+ -+/* .p2align 4 */ -+ -+L(preskip): -+ subq %r8, %rdx /* check for more */ -+ testq $-64, %rdx -+ jnz L(preafter) -+ -+ andl $63, %edx /* check for left overs */ -+#ifdef USE_AS_MEMPCPY -+ jnz L(1) -+ -+ movq %rdi, %rax -+#else -+ movq RETVAL (%rsp), %rax -+ jnz L(1) -+ -+ rep -+#endif -+ retq /* exit */ -+ -+ .p2align 4 -+ -+L(preafter): -+ -+/* Loop to handle huge blocks. */ -+ -+L(NTtry): -+ -+L(NT): /* non-temporal 128-byte */ -+ movq %rdx, %rcx -+ shrq $7, %rcx -+ jz L(NTskip) -+ -+ movq %r14, SAVE0 (%rsp) -+ cfi_rel_offset (%r14, SAVE0) -+ movq %r13, SAVE1 (%rsp) -+ cfi_rel_offset (%r13, SAVE1) -+ movq %r12, SAVE2 (%rsp) -+ cfi_rel_offset (%r12, SAVE2) -+ -+ .p2align 4 -+ -+L(NTloop): -+ prefetchnta 768 (%rsi) -+ prefetchnta 832 (%rsi) -+ -+ decq %rcx -+ -+ movq (%rsi), %rax -+ movq 8 (%rsi), %r8 -+ movq 16 (%rsi), %r9 -+ movq 24 (%rsi), %r10 -+ movq 32 (%rsi), %r11 -+ movq 40 (%rsi), %r12 -+ movq 48 (%rsi), %r13 -+ movq 56 (%rsi), %r14 -+ -+ movntiq %rax, (%rdi) -+ movntiq %r8, 8 (%rdi) -+ movntiq %r9, 16 (%rdi) -+ movntiq %r10, 24 (%rdi) -+ movntiq %r11, 32 (%rdi) -+ movntiq %r12, 40 (%rdi) -+ movntiq %r13, 48 (%rdi) -+ movntiq %r14, 56 (%rdi) -+ -+ movq 64 (%rsi), %rax -+ movq 72 (%rsi), %r8 -+ movq 80 (%rsi), %r9 -+ movq 88 (%rsi), %r10 -+ movq 96 (%rsi), %r11 -+ movq 104 (%rsi), %r12 -+ movq 112 (%rsi), %r13 -+ movq 120 (%rsi), %r14 -+ -+ movntiq %rax, 64 (%rdi) -+ movntiq %r8, 72 (%rdi) -+ movntiq %r9, 80 (%rdi) -+ movntiq %r10, 88 (%rdi) -+ movntiq %r11, 96 (%rdi) -+ movntiq %r12, 104 (%rdi) -+ movntiq %r13, 112 (%rdi) -+ movntiq %r14, 120 (%rdi) -+ -+ leaq 128 (%rsi), %rsi -+ leaq 128 (%rdi), %rdi -+ -+ jnz L(NTloop) -+ -+ sfence /* serialize memory stores */ -+ -+ movq SAVE2 (%rsp), %r12 -+ cfi_restore (%r12) -+ movq SAVE1 (%rsp), %r13 -+ cfi_restore (%r13) -+ movq SAVE0 (%rsp), %r14 -+ cfi_restore (%r14) -+ -+L(NTskip): -+ andl $127, %edx /* check for left overs */ -+#ifdef USE_AS_MEMPCPY -+ jnz L(1) -+ -+ movq %rdi, %rax -+#else -+ movq RETVAL (%rsp), %rax -+ jnz L(1) -+ -+ rep -+#endif -+ retq /* exit */ -+ -+#endif /* !NOT_IN_libc */ -+ -+END(memcpy) -+ -+#ifndef USE_AS_MEMPCPY - libc_hidden_builtin_def (memcpy) - #endif ---- libc/sysdeps/x86_64/mempcpy.S 18 Oct 2004 04:17:08 -0000 1.3 -+++ libc/sysdeps/x86_64/mempcpy.S 21 May 2007 19:21:16 -0000 1.4 -@@ -1,3 +1,4 @@ -+#define USE_AS_MEMPCPY - #define memcpy __mempcpy - #define __memcpy_chk __mempcpy_chk - #include <sysdeps/x86_64/memcpy.S> ---- libc/sysdeps/unix/sysv/linux/x86_64/sysconf.c 10 Nov 2006 07:31:55 -0000 1.7 -+++ libc/sysdeps/unix/sysv/linux/x86_64/sysconf.c 21 May 2007 19:18:37 -0000 1.8 -@@ -24,328 +24,17 @@ - - - static long int linux_sysconf (int name); -- -- --static const struct intel_02_cache_info --{ -- unsigned int idx; -- int name; -- long int size; -- long int assoc; -- long int linesize; --} intel_02_known[] = -- { -- { 0x06, _SC_LEVEL1_ICACHE_SIZE, 8192, 4, 32 }, -- { 0x08, _SC_LEVEL1_ICACHE_SIZE, 16384, 4, 32 }, -- { 0x0a, _SC_LEVEL1_DCACHE_SIZE, 8192, 2, 32 }, -- { 0x0c, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 32 }, -- { 0x22, _SC_LEVEL3_CACHE_SIZE, 524288, 4, 64 }, -- { 0x23, _SC_LEVEL3_CACHE_SIZE, 1048576, 8, 64 }, -- { 0x25, _SC_LEVEL3_CACHE_SIZE, 2097152, 8, 64 }, -- { 0x29, _SC_LEVEL3_CACHE_SIZE, 4194304, 8, 64 }, -- { 0x2c, _SC_LEVEL1_DCACHE_SIZE, 32768, 8, 64 }, -- { 0x30, _SC_LEVEL1_ICACHE_SIZE, 32768, 8, 64 }, -- { 0x39, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 64 }, -- { 0x3a, _SC_LEVEL2_CACHE_SIZE, 196608, 6, 64 }, -- { 0x3b, _SC_LEVEL2_CACHE_SIZE, 131072, 2, 64 }, -- { 0x3c, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 64 }, -- { 0x3d, _SC_LEVEL2_CACHE_SIZE, 393216, 6, 64 }, -- { 0x3e, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 }, -- { 0x41, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 32 }, -- { 0x42, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 32 }, -- { 0x43, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 32 }, -- { 0x44, _SC_LEVEL2_CACHE_SIZE, 1048576, 4, 32 }, -- { 0x45, _SC_LEVEL2_CACHE_SIZE, 2097152, 4, 32 }, -- { 0x46, _SC_LEVEL3_CACHE_SIZE, 4194304, 4, 64 }, -- { 0x47, _SC_LEVEL3_CACHE_SIZE, 8388608, 8, 64 }, -- { 0x49, _SC_LEVEL2_CACHE_SIZE, 4194304, 16, 64 }, -- { 0x4a, _SC_LEVEL3_CACHE_SIZE, 6291456, 12, 64 }, -- { 0x4b, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 }, -- { 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 }, -- { 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 }, -- { 0x60, _SC_LEVEL1_DCACHE_SIZE, 16384, 8, 64 }, -- { 0x66, _SC_LEVEL1_DCACHE_SIZE, 8192, 4, 64 }, -- { 0x67, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 }, -- { 0x68, _SC_LEVEL1_DCACHE_SIZE, 32768, 4, 64 }, -- { 0x78, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 }, -- { 0x79, _SC_LEVEL2_CACHE_SIZE, 131072, 8, 64 }, -- { 0x7a, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 64 }, -- { 0x7b, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 64 }, -- { 0x7c, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 }, -- { 0x7d, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 64 }, -- { 0x7f, _SC_LEVEL2_CACHE_SIZE, 524288, 2, 64 }, -- { 0x82, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 32 }, -- { 0x83, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 32 }, -- { 0x84, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 32 }, -- { 0x85, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 32 }, -- { 0x86, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 }, -- { 0x87, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 }, -- }; --#define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known[0])) -- -- --static int --intel_02_known_compare (const void *p1, const void *p2) --{ -- const struct intel_02_cache_info *i1; -- const struct intel_02_cache_info *i2; -- -- i1 = (const struct intel_02_cache_info *) p1; -- i2 = (const struct intel_02_cache_info *) p2; -- -- if (i1->idx == i2->idx) -- return 0; -- -- return i1->idx < i2->idx ? -1 : 1; --} -- -- --static long int --__attribute__ ((noinline)) --intel_check_word (int name, unsigned int value, bool *has_level_2, -- bool *no_level_2_or_3) --{ -- if ((value & 0x80000000) != 0) -- /* The register value is reserved. */ -- return 0; -- -- /* Fold the name. The _SC_ constants are always in the order SIZE, -- ASSOC, LINESIZE. */ -- int folded_name = (_SC_LEVEL1_ICACHE_SIZE -- + ((name - _SC_LEVEL1_ICACHE_SIZE) / 3) * 3); -- -- while (value != 0) -- { -- unsigned int byte = value & 0xff; -- -- if (byte == 0x40) -- { -- *no_level_2_or_3 = true; -- -- if (folded_name == _SC_LEVEL3_CACHE_SIZE) -- /* No need to look further. */ -- break; -- } -- else -- { -- if (byte == 0x49 && folded_name == _SC_LEVEL3_CACHE_SIZE) -- { -- /* Intel reused this value. For family 15, model 6 it -- specifies the 3rd level cache. Otherwise the 2nd -- level cache. */ -- unsigned int eax; -- unsigned int ebx; -- unsigned int ecx; -- unsigned int edx; -- asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" -- : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) -- : "0" (1)); -- -- unsigned int family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf); -- unsigned int model = ((((eax >>16) & 0xf) << 4) -- + ((eax >> 4) & 0xf)); -- if (family == 15 && model == 6) -- { -- /* The level 3 cache is encoded for this model like -- the level 2 cache is for other models. Pretend -- the caller asked for the level 2 cache. */ -- name = (_SC_LEVEL2_CACHE_SIZE -- + (name - _SC_LEVEL3_CACHE_SIZE)); -- folded_name = _SC_LEVEL3_CACHE_SIZE; -- } -- } -- -- struct intel_02_cache_info *found; -- struct intel_02_cache_info search; -- -- search.idx = byte; -- found = bsearch (&search, intel_02_known, nintel_02_known, -- sizeof (intel_02_known[0]), intel_02_known_compare); -- if (found != NULL) -- { -- if (found->name == folded_name) -- { -- unsigned int offset = name - folded_name; -- -- if (offset == 0) -- /* Cache size. */ -- return found->size; -- if (offset == 1) -- return found->assoc; -- -- assert (offset == 2); -- return found->linesize; -- } -- -- if (found->name == _SC_LEVEL2_CACHE_SIZE) -- *has_level_2 = true; -- } -- } -- -- /* Next byte for the next round. */ -- value >>= 8; -- } -- -- /* Nothing found. */ -- return 0; --} -- -- --static long int __attribute__ ((noinline)) --handle_intel (int name, unsigned int maxidx) --{ -- assert (maxidx >= 2); -- -- /* OK, we can use the CPUID instruction to get all info about the -- caches. */ -- unsigned int cnt = 0; -- unsigned int max = 1; -- long int result = 0; -- bool no_level_2_or_3 = false; -- bool has_level_2 = false; -- while (cnt++ < max) -- { -- unsigned int eax; -- unsigned int ebx; -- unsigned int ecx; -- unsigned int edx; -- asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" -- : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) -- : "0" (2)); -- -- /* The low byte of EAX in the first round contain the number of -- rounds we have to make. At least one, the one we are already -- doing. */ -- if (cnt == 1) -- { -- max = eax & 0xff; -- eax &= 0xffffff00; -- } -- -- /* Process the individual registers' value. */ -- result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3); -- if (result != 0) -- return result; -- -- result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3); -- if (result != 0) -- return result; -- -- result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3); -- if (result != 0) -- return result; -- -- result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3); -- if (result != 0) -- return result; -- } -- -- if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE -- && no_level_2_or_3) -- return -1; -- -- return 0; --} -- -- --static long int __attribute__ ((noinline)) --handle_amd (int name) --{ -- unsigned int eax; -- unsigned int ebx; -- unsigned int ecx; -- unsigned int edx; -- asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" -- : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) -- : "0" (0x80000000)); -- -- if (name >= _SC_LEVEL3_CACHE_SIZE) -- return 0; -- -- unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE); -- if (eax < fn) -- return 0; -- -- asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" -- : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) -- : "0" (fn)); -- -- if (name < _SC_LEVEL1_DCACHE_SIZE) -- { -- name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE; -- ecx = edx; -- } -- -- switch (name) -- { -- case _SC_LEVEL1_DCACHE_SIZE: -- return (ecx >> 14) & 0x3fc00; -- case _SC_LEVEL1_DCACHE_ASSOC: -- ecx >>= 16; -- if ((ecx & 0xff) == 0xff) -- /* Fully associative. */ -- return (ecx << 2) & 0x3fc00; -- return ecx & 0xff; -- case _SC_LEVEL1_DCACHE_LINESIZE: -- return ecx & 0xff; -- case _SC_LEVEL2_CACHE_SIZE: -- return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00; -- case _SC_LEVEL2_CACHE_ASSOC: -- ecx >>= 12; -- switch (ecx & 0xf) -- { -- case 0: -- case 1: -- case 2: -- case 4: -- return ecx & 0xf; -- case 6: -- return 8; -- case 8: -- return 16; -- case 0xf: -- return (ecx << 6) & 0x3fffc00; -- default: -- return 0; -- } -- case _SC_LEVEL2_CACHE_LINESIZE: -- return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff; -- default: -- assert (! "cannot happen"); -- } -- return -1; --} -+extern long int __cache_sysconf (int) attribute_hidden; - - - /* Get the value of the system variable NAME. */ - long int - __sysconf (int name) - { -- /* We only handle the cache information here (for now). */ -- if (name < _SC_LEVEL1_ICACHE_SIZE || name > _SC_LEVEL4_CACHE_LINESIZE) -- return linux_sysconf (name); -- -- /* Find out what brand of processor. */ -- unsigned int eax; -- unsigned int ebx; -- unsigned int ecx; -- unsigned int edx; -- asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" -- : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) -- : "0" (0)); -- -- /* This spells out "GenuineIntel". */ -- if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) -- return handle_intel (name, eax); -- -- /* This spells out "AuthenticAMD". */ -- if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) -- return handle_amd (name); -- -- // XXX Fill in more vendors. -+ if (name >= _SC_LEVEL1_ICACHE_SIZE && name <= _SC_LEVEL4_CACHE_LINESIZE) -+ return __cache_sysconf (name); - -- /* CPU not known, we have no information. */ -- return 0; -+ return linux_sysconf (name); - } - - /* Now the generic Linux version. */ |