linux/tools/testing/selftests/futex/functional/futex_numa.c

// SPDX-License-Identifier: GPL-2.0

#include <pthread.h>
#include <sys/shm.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <stdbool.h>
#include <time.h>
#include <assert.h>
#include "logging.h"
#include "futextest.h"
#include "futex2test.h"

typedef u_int32_t u32;
typedef int32_t   s32;
typedef u_int64_t u64;

static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE);
static int fnode = FUTEX_NO_NODE;

/* fairly stupid test-and-set lock with a waiter flag */

#define N_LOCK		0x0000001
#define N_WAITERS	0x0001000

struct futex_numa_32 {
	union {
		u64 full;
		struct {
			u32 val;
			u32 node;
		};
	};
};

void futex_numa_32_lock(struct futex_numa_32 *lock)
{
	for (;;) {
		struct futex_numa_32 new, old = {
			.full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED),
		};

		for (;;) {
			new = old;
			if (old.val == 0) {
				/* no waiter, no lock -> first lock, set no-node */
				new.node = fnode;
			}
			if (old.val & N_LOCK) {
				/* contention, set waiter */
				new.val |= N_WAITERS;
			}
			new.val |= N_LOCK;

			/* nothing changed, ready to block */
			if (old.full == new.full)
				break;

			/*
			 * Use u64 cmpxchg to set the futex value and node in a
			 * consistent manner.
			 */
			if (__atomic_compare_exchange_n(&lock->full,
							&old.full, new.full,
							/* .weak */ false,
							__ATOMIC_ACQUIRE,
							__ATOMIC_RELAXED)) {

				/* if we just set N_LOCK, we own it */
				if (!(old.val & N_LOCK))
					return;

				/* go block */
				break;
			}
		}

		futex2_wait(lock, new.val, fflags, NULL, 0);
	}
}

void futex_numa_32_unlock(struct futex_numa_32 *lock)
{
	u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE);
	assert((s32)val >= 0);
	if (val & N_WAITERS) {
		int woken = futex2_wake(lock, 1, fflags);
		assert(val == N_WAITERS);
		if (!woken) {
			__atomic_compare_exchange_n(&lock->val, &val, 0U,
						    false, __ATOMIC_RELAXED,
						    __ATOMIC_RELAXED);
		}
	}
}

static long nanos = 50000;

struct thread_args {
	pthread_t tid;
	volatile int * done;
	struct futex_numa_32 *lock;
	int val;
	int *val1, *val2;
	int node;
};

static void *threadfn(void *_arg)
{
	struct thread_args *args = _arg;
	struct timespec ts = {
		.tv_nsec = nanos,
	};
	int node;

	while (!*args->done) {

		futex_numa_32_lock(args->lock);
		args->val++;

		assert(*args->val1 == *args->val2);
		(*args->val1)++;
		nanosleep(&ts, NULL);
		(*args->val2)++;

		node = args->lock->node;
		futex_numa_32_unlock(args->lock);

		if (node != args->node) {
			args->node = node;
			printf("node: %d\n", node);
		}

		nanosleep(&ts, NULL);
	}

	return NULL;
}

static void *contendfn(void *_arg)
{
	struct thread_args *args = _arg;

	while (!*args->done) {
		/*
		 * futex2_wait() will take hb-lock, verify *var == val and
		 * queue/abort.  By knowingly setting val 'wrong' this will
		 * abort and thereby generate hb-lock contention.
		 */
		futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0);
		args->val++;
	}

	return NULL;
}

static volatile int done = 0;
static struct futex_numa_32 lock = { .val = 0, };
static int val1, val2;

int main(int argc, char *argv[])
{
	struct thread_args *tas[512], *cas[512];
	int c, t, threads = 2, contenders = 0;
	int sleeps = 10;
	int total = 0;

	while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) {
		switch (c) {
		case 'c':
			contenders = atoi(optarg);
			break;
		case 't':
			threads = atoi(optarg);
			break;
		case 's':
			sleeps = atoi(optarg);
			break;
		case 'n':
			nanos = atoi(optarg);
			break;
		case 'N':
			fflags |= FUTEX2_NUMA;
			if (optarg)
				fnode = atoi(optarg);
			break;
		default:
			exit(1);
			break;
		}
	}

	for (t = 0; t < contenders; t++) {
		struct thread_args *args = calloc(1, sizeof(*args));
		if (!args) {
			perror("thread_args");
			exit(-1);
		}

		args->done = &done;
		args->lock = &lock;
		args->val1 = &val1;
		args->val2 = &val2;
		args->node = -1;

		if (pthread_create(&args->tid, NULL, contendfn, args)) {
			perror("pthread_create");
			exit(-1);
		}

		cas[t] = args;
	}

	for (t = 0; t < threads; t++) {
		struct thread_args *args = calloc(1, sizeof(*args));
		if (!args) {
			perror("thread_args");
			exit(-1);
		}

		args->done = &done;
		args->lock = &lock;
		args->val1 = &val1;
		args->val2 = &val2;
		args->node = -1;

		if (pthread_create(&args->tid, NULL, threadfn, args)) {
			perror("pthread_create");
			exit(-1);
		}

		tas[t] = args;
	}

	sleep(sleeps);

	done = true;

	for (t = 0; t < threads; t++) {
		struct thread_args *args = tas[t];

		pthread_join(args->tid, NULL);
		total += args->val;
//		printf("tval: %d\n", args->val);
	}
	printf("total: %d\n", total);

	if (contenders) {
		total = 0;
		for (t = 0; t < contenders; t++) {
			struct thread_args *args = cas[t];

			pthread_join(args->tid, NULL);
			total += args->val;
			//		printf("tval: %d\n", args->val);
		}
		printf("contenders: %d\n", total);
	}

	return 0;
}