mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-18 22:14:16 +00:00 
			
		
		
		
	 166a0f780a
			
		
	
	
		166a0f780a
		
	
	
	
	
		
			
			There is a slightly faster way (in terms of the number of instructions
being used) to calculate the position of a middle element, preserving
integer overflow safeness.
./scripts/bloat-o-meter lib/bsearch.o.old lib/bsearch.o.new
add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-24 (-24)
function                                     old     new   delta
bsearch                                      122      98     -24
TEST
INT array of size 100001, elements [0..100000]. gcc 7.1, Os, x86_64.
a) bsearch() of existing key "100001 - 2":
BASE
====
$ perf stat ./a.out
 Performance counter stats for './a.out':
        619.445196      task-clock:u (msec)       #    0.999 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
               133      page-faults:u             #    0.215 K/sec
     1,949,517,279      cycles:u                  #    3.147 GHz                      (83.06%)
       181,017,938      stalled-cycles-frontend:u #    9.29% frontend cycles idle     (83.05%)
        82,959,265      stalled-cycles-backend:u  #    4.26% backend cycles idle      (67.02%)
     4,355,706,383      instructions:u            #    2.23  insn per cycle
                                                  #    0.04  stalled cycles per insn  (83.54%)
     1,051,539,242      branches:u                # 1697.550 M/sec                    (83.54%)
        15,263,381      branch-misses:u           #    1.45% of all branches          (83.43%)
       0.620082548 seconds time elapsed
PATCHED
=======
$ perf stat ./a.out
 Performance counter stats for './a.out':
        475.097316      task-clock:u (msec)       #    0.999 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
               135      page-faults:u             #    0.284 K/sec
     1,487,467,717      cycles:u                  #    3.131 GHz                      (82.95%)
       186,537,162      stalled-cycles-frontend:u #   12.54% frontend cycles idle     (82.93%)
        28,797,869      stalled-cycles-backend:u  #    1.94% backend cycles idle      (67.10%)
     3,807,564,203      instructions:u            #    2.56  insn per cycle
                                                  #    0.05  stalled cycles per insn  (83.57%)
     1,049,344,291      branches:u                # 2208.693 M/sec                    (83.60%)
             5,485      branch-misses:u           #    0.00% of all branches          (83.58%)
       0.475760235 seconds time elapsed
b) bsearch() of un-existing key "100001 + 2":
BASE
====
$ perf stat ./a.out
 Performance counter stats for './a.out':
        499.244480      task-clock:u (msec)       #    0.999 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
               132      page-faults:u             #    0.264 K/sec
     1,571,194,855      cycles:u                  #    3.147 GHz                      (83.18%)
        13,450,980      stalled-cycles-frontend:u #    0.86% frontend cycles idle     (83.18%)
        21,256,072      stalled-cycles-backend:u  #    1.35% backend cycles idle      (66.78%)
     4,171,197,909      instructions:u            #    2.65  insn per cycle
                                                  #    0.01  stalled cycles per insn  (83.68%)
     1,009,175,281      branches:u                # 2021.405 M/sec                    (83.79%)
             3,122      branch-misses:u           #    0.00% of all branches          (83.37%)
       0.499871144 seconds time elapsed
PATCHED
=======
$ perf stat ./a.out
 Performance counter stats for './a.out':
        399.023499      task-clock:u (msec)       #    0.998 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
               134      page-faults:u             #    0.336 K/sec
     1,245,793,991      cycles:u                  #    3.122 GHz                      (83.39%)
        11,529,273      stalled-cycles-frontend:u #    0.93% frontend cycles idle     (83.46%)
        12,116,311      stalled-cycles-backend:u  #    0.97% backend cycles idle      (66.92%)
     3,679,710,005      instructions:u            #    2.95  insn per cycle
                                                  #    0.00  stalled cycles per insn  (83.47%)
     1,009,792,625      branches:u                # 2530.660 M/sec                    (83.46%)
             2,590      branch-misses:u           #    0.00% of all branches          (83.12%)
       0.399733539 seconds time elapsed
Link: http://lkml.kernel.org/r/20170607150457.5905-1-sergey.senozhatsky@gmail.com
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
	
			
		
			
				
	
	
		
			55 lines
		
	
	
	
		
			1.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			55 lines
		
	
	
	
		
			1.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * A generic implementation of binary search for the Linux kernel
 | |
|  *
 | |
|  * Copyright (C) 2008-2009 Ksplice, Inc.
 | |
|  * Author: Tim Abbott <tabbott@ksplice.com>
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of the GNU General Public License as
 | |
|  * published by the Free Software Foundation; version 2.
 | |
|  */
 | |
| 
 | |
| #include <linux/export.h>
 | |
| #include <linux/bsearch.h>
 | |
| 
 | |
| /*
 | |
|  * bsearch - binary search an array of elements
 | |
|  * @key: pointer to item being searched for
 | |
|  * @base: pointer to first element to search
 | |
|  * @num: number of elements
 | |
|  * @size: size of each element
 | |
|  * @cmp: pointer to comparison function
 | |
|  *
 | |
|  * This function does a binary search on the given array.  The
 | |
|  * contents of the array should already be in ascending sorted order
 | |
|  * under the provided comparison function.
 | |
|  *
 | |
|  * Note that the key need not have the same type as the elements in
 | |
|  * the array, e.g. key could be a string and the comparison function
 | |
|  * could compare the string with the struct's name field.  However, if
 | |
|  * the key and elements in the array are of the same type, you can use
 | |
|  * the same comparison function for both sort() and bsearch().
 | |
|  */
 | |
| void *bsearch(const void *key, const void *base, size_t num, size_t size,
 | |
| 	      int (*cmp)(const void *key, const void *elt))
 | |
| {
 | |
| 	const char *pivot;
 | |
| 	int result;
 | |
| 
 | |
| 	while (num > 0) {
 | |
| 		pivot = base + (num >> 1) * size;
 | |
| 		result = cmp(key, pivot);
 | |
| 
 | |
| 		if (result == 0)
 | |
| 			return (void *)pivot;
 | |
| 
 | |
| 		if (result > 0) {
 | |
| 			base = pivot + size;
 | |
| 			num--;
 | |
| 		}
 | |
| 		num >>= 1;
 | |
| 	}
 | |
| 
 | |
| 	return NULL;
 | |
| }
 | |
| EXPORT_SYMBOL(bsearch);
 |