diff --git a/manual/tunables.texi b/manual/tunables.texi
index 3dc6f9a..3e1e519 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -364,7 +364,11 @@ set shared cache size in bytes for use in memory and string routines.
 
 @deftp Tunable glibc.tune.x86_non_temporal_threshold
 The @code{glibc.tune.x86_non_temporal_threshold} tunable allows the user
-to set threshold in bytes for non temporal store.
+to set threshold in bytes for non temporal store. Non temporal stores
+give a hint to the hardware to move data directly to memory without
+displacing other data from the cache. This tunable is used by some
+platforms to determine when to use non temporal stores in operations
+like memmove and memcpy.
 
 This tunable is specific to i386 and x86-64.
 @end deftp
diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
index b9444dd..42b468d 100644
--- a/sysdeps/x86/cacheinfo.c
+++ b/sysdeps/x86/cacheinfo.c
@@ -778,14 +778,20 @@ intel_bug_no_cache_info:
       __x86_shared_cache_size = shared;
     }
 
-  /* The large memcpy micro benchmark in glibc shows that 6 times of
-     shared cache size is the approximate value above which non-temporal
-     store becomes faster on a 8-core processor.  This is the 3/4 of the
-     total shared cache size.  */
+  /* The default setting for the non_temporal threshold is 3/4 of one
+     thread's share of the chip's cache. For most Intel and AMD processors
+     with an initial release date between 2017 and 2020, a thread's typical
+     share of the cache is from 500 KBytes to 2 MBytes. Using the 3/4
+     threshold leaves 125 KBytes to 500 KBytes of the thread's data
+     in cache after a maximum temporal copy, which will maintain
+     in cache a reasonable portion of the thread's stack and other
+     active data. If the threshold is set higher than one thread's
+     share of the cache, it has a substantial risk of negatively
+     impacting the performance of other threads running on the chip. */
   __x86_shared_non_temporal_threshold
     = (cpu_features->non_temporal_threshold != 0
        ? cpu_features->non_temporal_threshold
-       : __x86_shared_cache_size * threads * 3 / 4);
+       : __x86_shared_cache_size * 3 / 4);
 }
 
 #endif