mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-11-01 09:13:37 +00:00 
			
		
		
		
	Fixed multiple spelling errors. Acked-by: Randy Dunlap <rdunlap@infradead.org> Signed-off-by: Carlos E. Garcia <carlos@cgarcia.org> Signed-off-by: Jiri Kosina <jkosina@suse.cz>
		
			
				
	
	
		
			392 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
			
		
		
	
	
			392 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
Red-black Trees (rbtree) in Linux
 | 
						|
January 18, 2007
 | 
						|
Rob Landley <rob@landley.net>
 | 
						|
=============================
 | 
						|
 | 
						|
What are red-black trees, and what are they for?
 | 
						|
------------------------------------------------
 | 
						|
 | 
						|
Red-black trees are a type of self-balancing binary search tree, used for
 | 
						|
storing sortable key/value data pairs.  This differs from radix trees (which
 | 
						|
are used to efficiently store sparse arrays and thus use long integer indexes
 | 
						|
to insert/access/delete nodes) and hash tables (which are not kept sorted to
 | 
						|
be easily traversed in order, and must be tuned for a specific size and
 | 
						|
hash function where rbtrees scale gracefully storing arbitrary keys).
 | 
						|
 | 
						|
Red-black trees are similar to AVL trees, but provide faster real-time bounded
 | 
						|
worst case performance for insertion and deletion (at most two rotations and
 | 
						|
three rotations, respectively, to balance the tree), with slightly slower
 | 
						|
(but still O(log n)) lookup time.
 | 
						|
 | 
						|
To quote Linux Weekly News:
 | 
						|
 | 
						|
    There are a number of red-black trees in use in the kernel.
 | 
						|
    The deadline and CFQ I/O schedulers employ rbtrees to
 | 
						|
    track requests; the packet CD/DVD driver does the same.
 | 
						|
    The high-resolution timer code uses an rbtree to organize outstanding
 | 
						|
    timer requests.  The ext3 filesystem tracks directory entries in a
 | 
						|
    red-black tree.  Virtual memory areas (VMAs) are tracked with red-black
 | 
						|
    trees, as are epoll file descriptors, cryptographic keys, and network
 | 
						|
    packets in the "hierarchical token bucket" scheduler.
 | 
						|
 | 
						|
This document covers use of the Linux rbtree implementation.  For more
 | 
						|
information on the nature and implementation of Red Black Trees,  see:
 | 
						|
 | 
						|
  Linux Weekly News article on red-black trees
 | 
						|
    http://lwn.net/Articles/184495/
 | 
						|
 | 
						|
  Wikipedia entry on red-black trees
 | 
						|
    http://en.wikipedia.org/wiki/Red-black_tree
 | 
						|
 | 
						|
Linux implementation of red-black trees
 | 
						|
---------------------------------------
 | 
						|
 | 
						|
Linux's rbtree implementation lives in the file "lib/rbtree.c".  To use it,
 | 
						|
"#include <linux/rbtree.h>".
 | 
						|
 | 
						|
The Linux rbtree implementation is optimized for speed, and thus has one
 | 
						|
less layer of indirection (and better cache locality) than more traditional
 | 
						|
tree implementations.  Instead of using pointers to separate rb_node and data
 | 
						|
structures, each instance of struct rb_node is embedded in the data structure
 | 
						|
it organizes.  And instead of using a comparison callback function pointer,
 | 
						|
users are expected to write their own tree search and insert functions
 | 
						|
which call the provided rbtree functions.  Locking is also left up to the
 | 
						|
user of the rbtree code.
 | 
						|
 | 
						|
Creating a new rbtree
 | 
						|
---------------------
 | 
						|
 | 
						|
Data nodes in an rbtree tree are structures containing a struct rb_node member:
 | 
						|
 | 
						|
  struct mytype {
 | 
						|
  	struct rb_node node;
 | 
						|
  	char *keystring;
 | 
						|
  };
 | 
						|
 | 
						|
When dealing with a pointer to the embedded struct rb_node, the containing data
 | 
						|
structure may be accessed with the standard container_of() macro.  In addition,
 | 
						|
individual members may be accessed directly via rb_entry(node, type, member).
 | 
						|
 | 
						|
At the root of each rbtree is an rb_root structure, which is initialized to be
 | 
						|
empty via:
 | 
						|
 | 
						|
  struct rb_root mytree = RB_ROOT;
 | 
						|
 | 
						|
Searching for a value in an rbtree
 | 
						|
----------------------------------
 | 
						|
 | 
						|
Writing a search function for your tree is fairly straightforward: start at the
 | 
						|
root, compare each value, and follow the left or right branch as necessary.
 | 
						|
 | 
						|
Example:
 | 
						|
 | 
						|
  struct mytype *my_search(struct rb_root *root, char *string)
 | 
						|
  {
 | 
						|
  	struct rb_node *node = root->rb_node;
 | 
						|
 | 
						|
  	while (node) {
 | 
						|
  		struct mytype *data = container_of(node, struct mytype, node);
 | 
						|
		int result;
 | 
						|
 | 
						|
		result = strcmp(string, data->keystring);
 | 
						|
 | 
						|
		if (result < 0)
 | 
						|
  			node = node->rb_left;
 | 
						|
		else if (result > 0)
 | 
						|
  			node = node->rb_right;
 | 
						|
		else
 | 
						|
  			return data;
 | 
						|
	}
 | 
						|
	return NULL;
 | 
						|
  }
 | 
						|
 | 
						|
Inserting data into an rbtree
 | 
						|
-----------------------------
 | 
						|
 | 
						|
Inserting data in the tree involves first searching for the place to insert the
 | 
						|
new node, then inserting the node and rebalancing ("recoloring") the tree.
 | 
						|
 | 
						|
The search for insertion differs from the previous search by finding the
 | 
						|
location of the pointer on which to graft the new node.  The new node also
 | 
						|
needs a link to its parent node for rebalancing purposes.
 | 
						|
 | 
						|
Example:
 | 
						|
 | 
						|
  int my_insert(struct rb_root *root, struct mytype *data)
 | 
						|
  {
 | 
						|
  	struct rb_node **new = &(root->rb_node), *parent = NULL;
 | 
						|
 | 
						|
  	/* Figure out where to put new node */
 | 
						|
  	while (*new) {
 | 
						|
  		struct mytype *this = container_of(*new, struct mytype, node);
 | 
						|
  		int result = strcmp(data->keystring, this->keystring);
 | 
						|
 | 
						|
		parent = *new;
 | 
						|
  		if (result < 0)
 | 
						|
  			new = &((*new)->rb_left);
 | 
						|
  		else if (result > 0)
 | 
						|
  			new = &((*new)->rb_right);
 | 
						|
  		else
 | 
						|
  			return FALSE;
 | 
						|
  	}
 | 
						|
 | 
						|
  	/* Add new node and rebalance tree. */
 | 
						|
  	rb_link_node(&data->node, parent, new);
 | 
						|
  	rb_insert_color(&data->node, root);
 | 
						|
 | 
						|
	return TRUE;
 | 
						|
  }
 | 
						|
 | 
						|
Removing or replacing existing data in an rbtree
 | 
						|
------------------------------------------------
 | 
						|
 | 
						|
To remove an existing node from a tree, call:
 | 
						|
 | 
						|
  void rb_erase(struct rb_node *victim, struct rb_root *tree);
 | 
						|
 | 
						|
Example:
 | 
						|
 | 
						|
  struct mytype *data = mysearch(&mytree, "walrus");
 | 
						|
 | 
						|
  if (data) {
 | 
						|
  	rb_erase(&data->node, &mytree);
 | 
						|
  	myfree(data);
 | 
						|
  }
 | 
						|
 | 
						|
To replace an existing node in a tree with a new one with the same key, call:
 | 
						|
 | 
						|
  void rb_replace_node(struct rb_node *old, struct rb_node *new,
 | 
						|
  			struct rb_root *tree);
 | 
						|
 | 
						|
Replacing a node this way does not re-sort the tree: If the new node doesn't
 | 
						|
have the same key as the old node, the rbtree will probably become corrupted.
 | 
						|
 | 
						|
Iterating through the elements stored in an rbtree (in sort order)
 | 
						|
------------------------------------------------------------------
 | 
						|
 | 
						|
Four functions are provided for iterating through an rbtree's contents in
 | 
						|
sorted order.  These work on arbitrary trees, and should not need to be
 | 
						|
modified or wrapped (except for locking purposes):
 | 
						|
 | 
						|
  struct rb_node *rb_first(struct rb_root *tree);
 | 
						|
  struct rb_node *rb_last(struct rb_root *tree);
 | 
						|
  struct rb_node *rb_next(struct rb_node *node);
 | 
						|
  struct rb_node *rb_prev(struct rb_node *node);
 | 
						|
 | 
						|
To start iterating, call rb_first() or rb_last() with a pointer to the root
 | 
						|
of the tree, which will return a pointer to the node structure contained in
 | 
						|
the first or last element in the tree.  To continue, fetch the next or previous
 | 
						|
node by calling rb_next() or rb_prev() on the current node.  This will return
 | 
						|
NULL when there are no more nodes left.
 | 
						|
 | 
						|
The iterator functions return a pointer to the embedded struct rb_node, from
 | 
						|
which the containing data structure may be accessed with the container_of()
 | 
						|
macro, and individual members may be accessed directly via
 | 
						|
rb_entry(node, type, member).
 | 
						|
 | 
						|
Example:
 | 
						|
 | 
						|
  struct rb_node *node;
 | 
						|
  for (node = rb_first(&mytree); node; node = rb_next(node))
 | 
						|
	printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring);
 | 
						|
 | 
						|
Support for Augmented rbtrees
 | 
						|
-----------------------------
 | 
						|
 | 
						|
Augmented rbtree is an rbtree with "some" additional data stored in
 | 
						|
each node, where the additional data for node N must be a function of
 | 
						|
the contents of all nodes in the subtree rooted at N. This data can
 | 
						|
be used to augment some new functionality to rbtree. Augmented rbtree
 | 
						|
is an optional feature built on top of basic rbtree infrastructure.
 | 
						|
An rbtree user who wants this feature will have to call the augmentation
 | 
						|
functions with the user provided augmentation callback when inserting
 | 
						|
and erasing nodes.
 | 
						|
 | 
						|
C files implementing augmented rbtree manipulation must include
 | 
						|
<linux/rbtree_augmented.h> instead of <linus/rbtree.h>. Note that
 | 
						|
linux/rbtree_augmented.h exposes some rbtree implementations details
 | 
						|
you are not expected to rely on; please stick to the documented APIs
 | 
						|
there and do not include <linux/rbtree_augmented.h> from header files
 | 
						|
either so as to minimize chances of your users accidentally relying on
 | 
						|
such implementation details.
 | 
						|
 | 
						|
On insertion, the user must update the augmented information on the path
 | 
						|
leading to the inserted node, then call rb_link_node() as usual and
 | 
						|
rb_augment_inserted() instead of the usual rb_insert_color() call.
 | 
						|
If rb_augment_inserted() rebalances the rbtree, it will callback into
 | 
						|
a user provided function to update the augmented information on the
 | 
						|
affected subtrees.
 | 
						|
 | 
						|
When erasing a node, the user must call rb_erase_augmented() instead of
 | 
						|
rb_erase(). rb_erase_augmented() calls back into user provided functions
 | 
						|
to updated the augmented information on affected subtrees.
 | 
						|
 | 
						|
In both cases, the callbacks are provided through struct rb_augment_callbacks.
 | 
						|
3 callbacks must be defined:
 | 
						|
 | 
						|
- A propagation callback, which updates the augmented value for a given
 | 
						|
  node and its ancestors, up to a given stop point (or NULL to update
 | 
						|
  all the way to the root).
 | 
						|
 | 
						|
- A copy callback, which copies the augmented value for a given subtree
 | 
						|
  to a newly assigned subtree root.
 | 
						|
 | 
						|
- A tree rotation callback, which copies the augmented value for a given
 | 
						|
  subtree to a newly assigned subtree root AND recomputes the augmented
 | 
						|
  information for the former subtree root.
 | 
						|
 | 
						|
The compiled code for rb_erase_augmented() may inline the propagation and
 | 
						|
copy callbacks, which results in a large function, so each augmented rbtree
 | 
						|
user should have a single rb_erase_augmented() call site in order to limit
 | 
						|
compiled code size.
 | 
						|
 | 
						|
 | 
						|
Sample usage:
 | 
						|
 | 
						|
Interval tree is an example of augmented rb tree. Reference -
 | 
						|
"Introduction to Algorithms" by Cormen, Leiserson, Rivest and Stein.
 | 
						|
More details about interval trees:
 | 
						|
 | 
						|
Classical rbtree has a single key and it cannot be directly used to store
 | 
						|
interval ranges like [lo:hi] and do a quick lookup for any overlap with a new
 | 
						|
lo:hi or to find whether there is an exact match for a new lo:hi.
 | 
						|
 | 
						|
However, rbtree can be augmented to store such interval ranges in a structured
 | 
						|
way making it possible to do efficient lookup and exact match.
 | 
						|
 | 
						|
This "extra information" stored in each node is the maximum hi
 | 
						|
(max_hi) value among all the nodes that are its descendants. This
 | 
						|
information can be maintained at each node just be looking at the node
 | 
						|
and its immediate children. And this will be used in O(log n) lookup
 | 
						|
for lowest match (lowest start address among all possible matches)
 | 
						|
with something like:
 | 
						|
 | 
						|
struct interval_tree_node *
 | 
						|
interval_tree_first_match(struct rb_root *root,
 | 
						|
			  unsigned long start, unsigned long last)
 | 
						|
{
 | 
						|
	struct interval_tree_node *node;
 | 
						|
 | 
						|
	if (!root->rb_node)
 | 
						|
		return NULL;
 | 
						|
	node = rb_entry(root->rb_node, struct interval_tree_node, rb);
 | 
						|
 | 
						|
	while (true) {
 | 
						|
		if (node->rb.rb_left) {
 | 
						|
			struct interval_tree_node *left =
 | 
						|
				rb_entry(node->rb.rb_left,
 | 
						|
					 struct interval_tree_node, rb);
 | 
						|
			if (left->__subtree_last >= start) {
 | 
						|
				/*
 | 
						|
				 * Some nodes in left subtree satisfy Cond2.
 | 
						|
				 * Iterate to find the leftmost such node N.
 | 
						|
				 * If it also satisfies Cond1, that's the match
 | 
						|
				 * we are looking for. Otherwise, there is no
 | 
						|
				 * matching interval as nodes to the right of N
 | 
						|
				 * can't satisfy Cond1 either.
 | 
						|
				 */
 | 
						|
				node = left;
 | 
						|
				continue;
 | 
						|
			}
 | 
						|
		}
 | 
						|
		if (node->start <= last) {		/* Cond1 */
 | 
						|
			if (node->last >= start)	/* Cond2 */
 | 
						|
				return node;	/* node is leftmost match */
 | 
						|
			if (node->rb.rb_right) {
 | 
						|
				node = rb_entry(node->rb.rb_right,
 | 
						|
					struct interval_tree_node, rb);
 | 
						|
				if (node->__subtree_last >= start)
 | 
						|
					continue;
 | 
						|
			}
 | 
						|
		}
 | 
						|
		return NULL;	/* No match */
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
Insertion/removal are defined using the following augmented callbacks:
 | 
						|
 | 
						|
static inline unsigned long
 | 
						|
compute_subtree_last(struct interval_tree_node *node)
 | 
						|
{
 | 
						|
	unsigned long max = node->last, subtree_last;
 | 
						|
	if (node->rb.rb_left) {
 | 
						|
		subtree_last = rb_entry(node->rb.rb_left,
 | 
						|
			struct interval_tree_node, rb)->__subtree_last;
 | 
						|
		if (max < subtree_last)
 | 
						|
			max = subtree_last;
 | 
						|
	}
 | 
						|
	if (node->rb.rb_right) {
 | 
						|
		subtree_last = rb_entry(node->rb.rb_right,
 | 
						|
			struct interval_tree_node, rb)->__subtree_last;
 | 
						|
		if (max < subtree_last)
 | 
						|
			max = subtree_last;
 | 
						|
	}
 | 
						|
	return max;
 | 
						|
}
 | 
						|
 | 
						|
static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
 | 
						|
{
 | 
						|
	while (rb != stop) {
 | 
						|
		struct interval_tree_node *node =
 | 
						|
			rb_entry(rb, struct interval_tree_node, rb);
 | 
						|
		unsigned long subtree_last = compute_subtree_last(node);
 | 
						|
		if (node->__subtree_last == subtree_last)
 | 
						|
			break;
 | 
						|
		node->__subtree_last = subtree_last;
 | 
						|
		rb = rb_parent(&node->rb);
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
 | 
						|
{
 | 
						|
	struct interval_tree_node *old =
 | 
						|
		rb_entry(rb_old, struct interval_tree_node, rb);
 | 
						|
	struct interval_tree_node *new =
 | 
						|
		rb_entry(rb_new, struct interval_tree_node, rb);
 | 
						|
 | 
						|
	new->__subtree_last = old->__subtree_last;
 | 
						|
}
 | 
						|
 | 
						|
static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
 | 
						|
{
 | 
						|
	struct interval_tree_node *old =
 | 
						|
		rb_entry(rb_old, struct interval_tree_node, rb);
 | 
						|
	struct interval_tree_node *new =
 | 
						|
		rb_entry(rb_new, struct interval_tree_node, rb);
 | 
						|
 | 
						|
	new->__subtree_last = old->__subtree_last;
 | 
						|
	old->__subtree_last = compute_subtree_last(old);
 | 
						|
}
 | 
						|
 | 
						|
static const struct rb_augment_callbacks augment_callbacks = {
 | 
						|
	augment_propagate, augment_copy, augment_rotate
 | 
						|
};
 | 
						|
 | 
						|
void interval_tree_insert(struct interval_tree_node *node,
 | 
						|
			  struct rb_root *root)
 | 
						|
{
 | 
						|
	struct rb_node **link = &root->rb_node, *rb_parent = NULL;
 | 
						|
	unsigned long start = node->start, last = node->last;
 | 
						|
	struct interval_tree_node *parent;
 | 
						|
 | 
						|
	while (*link) {
 | 
						|
		rb_parent = *link;
 | 
						|
		parent = rb_entry(rb_parent, struct interval_tree_node, rb);
 | 
						|
		if (parent->__subtree_last < last)
 | 
						|
			parent->__subtree_last = last;
 | 
						|
		if (start < parent->start)
 | 
						|
			link = &parent->rb.rb_left;
 | 
						|
		else
 | 
						|
			link = &parent->rb.rb_right;
 | 
						|
	}
 | 
						|
 | 
						|
	node->__subtree_last = last;
 | 
						|
	rb_link_node(&node->rb, rb_parent, link);
 | 
						|
	rb_insert_augmented(&node->rb, root, &augment_callbacks);
 | 
						|
}
 | 
						|
 | 
						|
void interval_tree_remove(struct interval_tree_node *node,
 | 
						|
			  struct rb_root *root)
 | 
						|
{
 | 
						|
	rb_erase_augmented(&node->rb, root, &augment_callbacks);
 | 
						|
}
 |