mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

The current way that afs_server refs are accounted and cleaned up sometimes cause rmmod to hang when it is waiting for cell records to be removed. The problem is that the cell cleanup might occasionally happen before the server cleanup and then there's nothing that causes the cell to garbage-collect the remaining servers as they become inactive. Partially fix this by: (1) Give each afs_server record its own management timer that rather than relying on the cell manager's central timer to drive each individual cell's maintenance work item to garbage collect servers. This timer is set when afs_unuse_server() reduces a server's activity count to zero and will schedule the server's destroyer work item upon firing. (2) Give each afs_server record its own destroyer work item that removes the record from the cell's database, shuts down the timer, cancels any pending work for itself, sends an RPC to the server to cancel outstanding callbacks. This change, in combination with the timer, obviates the need to try and coordinate so closely between the cell record and a bunch of other server records to try and tear everything down in a coordinated fashion. With this, the cell record is pinned until the server RCU is complete and namespace/module removal will wait until all the cell records are removed. (3) Now that incoming calls are mapped to servers (and thus cells) using data attached to an rxrpc_peer, the UUID-to-server mapping tree is moved from the namespace to the cell (cell->fs_servers). This means there can no longer be duplicates therein - and that allows the mapping tree to be simpler as there doesn't need to be a chain of same-UUID servers that are in different cells. (4) The lock protecting the UUID mapping tree is switched to an rw_semaphore on the cell rather than a seqlock on the namespace as it's now only used during mounting in contexts in which we're allowed to sleep. (5) When it comes time for a cell that is being removed to purge its set of servers, it just needs to iterate over them and wake them up. Once a server becomes inactive, its destroyer work item will observe the state of the cell and immediately remove that record. (6) When a server record is removed, it is marked AFS_SERVER_FL_EXPIRED to prevent reattempts at removal. The record will be dispatched to RCU for destruction once its refcount reaches 0. (7) The AFS_SERVER_FL_UNCREATED/CREATING flags are used to synchronise simultaneous creation attempts. If one attempt fails, it will abandon the attempt and allow another to try again. Note that the record can't just be abandoned when dead as it's bound into a server list attached to a volume and only subject to replacement if the server list obtained for the volume from the VLDB changes. Signed-off-by: David Howells <dhowells@redhat.com> cc: Marc Dionne <marc.dionne@auristor.com> cc: linux-afs@lists.infradead.org cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20250224234154.2014840-15-dhowells@redhat.com/ # v1 Link: https://lore.kernel.org/r/20250310094206.801057-11-dhowells@redhat.com/ # v4
249 lines
6.2 KiB
C
249 lines
6.2 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/* AFS fileserver list management.
|
|
*
|
|
* Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/slab.h>
|
|
#include "internal.h"
|
|
|
|
void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
|
|
{
|
|
int i;
|
|
|
|
if (slist && refcount_dec_and_test(&slist->usage)) {
|
|
for (i = 0; i < slist->nr_servers; i++)
|
|
afs_unuse_server(net, slist->servers[i].server,
|
|
afs_server_trace_unuse_slist);
|
|
kfree_rcu(slist, rcu);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Build a server list from a VLDB record.
|
|
*/
|
|
struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume,
|
|
struct key *key,
|
|
struct afs_vldb_entry *vldb)
|
|
{
|
|
struct afs_server_list *slist;
|
|
struct afs_server *server;
|
|
unsigned int type_mask = 1 << volume->type;
|
|
bool use_newrepsites = false;
|
|
int ret = -ENOMEM, nr_servers = 0, newrep = 0, i, j, usable = 0;
|
|
|
|
/* Work out if we're going to restrict to NEWREPSITE-marked servers or
|
|
* not. If at least one site is marked as NEWREPSITE, then it's likely
|
|
* that "vos release" is busy updating RO sites. We cut over from one
|
|
* to the other when >=50% of the sites have been updated. Sites that
|
|
* are in the process of being updated are marked DONTUSE.
|
|
*/
|
|
for (i = 0; i < vldb->nr_servers; i++) {
|
|
if (!(vldb->fs_mask[i] & type_mask))
|
|
continue;
|
|
nr_servers++;
|
|
if (vldb->vlsf_flags[i] & AFS_VLSF_DONTUSE)
|
|
continue;
|
|
usable++;
|
|
if (vldb->vlsf_flags[i] & AFS_VLSF_NEWREPSITE)
|
|
newrep++;
|
|
}
|
|
|
|
slist = kzalloc(struct_size(slist, servers, nr_servers), GFP_KERNEL);
|
|
if (!slist)
|
|
goto error;
|
|
|
|
if (newrep) {
|
|
if (newrep < usable / 2) {
|
|
slist->ro_replicating = AFS_RO_REPLICATING_USE_OLD;
|
|
} else {
|
|
slist->ro_replicating = AFS_RO_REPLICATING_USE_NEW;
|
|
use_newrepsites = true;
|
|
}
|
|
}
|
|
|
|
refcount_set(&slist->usage, 1);
|
|
rwlock_init(&slist->lock);
|
|
|
|
/* Make sure a records exists for each server in the list. */
|
|
for (i = 0; i < vldb->nr_servers; i++) {
|
|
unsigned long se_flags = 0;
|
|
bool newrepsite = vldb->vlsf_flags[i] & AFS_VLSF_NEWREPSITE;
|
|
|
|
if (!(vldb->fs_mask[i] & type_mask))
|
|
continue;
|
|
if (vldb->vlsf_flags[i] & AFS_VLSF_DONTUSE)
|
|
__set_bit(AFS_SE_EXCLUDED, &se_flags);
|
|
if (newrep && (newrepsite ^ use_newrepsites))
|
|
__set_bit(AFS_SE_EXCLUDED, &se_flags);
|
|
|
|
server = afs_lookup_server(volume->cell, key, &vldb->fs_server[i],
|
|
vldb->addr_version[i]);
|
|
if (IS_ERR(server)) {
|
|
ret = PTR_ERR(server);
|
|
if (ret == -ENOENT ||
|
|
ret == -ENOMEDIUM)
|
|
continue;
|
|
goto error_2;
|
|
}
|
|
|
|
/* Insertion-sort by UUID */
|
|
for (j = 0; j < slist->nr_servers; j++)
|
|
if (memcmp(&slist->servers[j].server->uuid,
|
|
&server->uuid,
|
|
sizeof(server->uuid)) >= 0)
|
|
break;
|
|
if (j < slist->nr_servers) {
|
|
if (slist->servers[j].server == server) {
|
|
afs_unuse_server_notime(volume->cell->net, server,
|
|
afs_server_trace_unuse_slist_isort);
|
|
continue;
|
|
}
|
|
|
|
memmove(slist->servers + j + 1,
|
|
slist->servers + j,
|
|
(slist->nr_servers - j) * sizeof(struct afs_server_entry));
|
|
}
|
|
|
|
slist->servers[j].server = server;
|
|
slist->servers[j].volume = volume;
|
|
slist->servers[j].flags = se_flags;
|
|
slist->servers[j].cb_expires_at = AFS_NO_CB_PROMISE;
|
|
slist->nr_servers++;
|
|
}
|
|
|
|
if (slist->nr_servers == 0) {
|
|
ret = -EDESTADDRREQ;
|
|
goto error_2;
|
|
}
|
|
|
|
return slist;
|
|
|
|
error_2:
|
|
afs_put_serverlist(volume->cell->net, slist);
|
|
error:
|
|
return ERR_PTR(ret);
|
|
}
|
|
|
|
/*
|
|
* Copy the annotations from an old server list to its potential replacement.
|
|
*/
|
|
bool afs_annotate_server_list(struct afs_server_list *new,
|
|
struct afs_server_list *old)
|
|
{
|
|
unsigned long mask = 1UL << AFS_SE_EXCLUDED;
|
|
int i;
|
|
|
|
if (old->nr_servers != new->nr_servers ||
|
|
old->ro_replicating != new->ro_replicating)
|
|
goto changed;
|
|
|
|
for (i = 0; i < old->nr_servers; i++) {
|
|
if (old->servers[i].server != new->servers[i].server)
|
|
goto changed;
|
|
if ((old->servers[i].flags & mask) != (new->servers[i].flags & mask))
|
|
goto changed;
|
|
}
|
|
return false;
|
|
changed:
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Attach a volume to the servers it is going to use.
|
|
*/
|
|
void afs_attach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *slist)
|
|
{
|
|
struct afs_server_entry *se, *pe;
|
|
struct afs_server *server;
|
|
struct list_head *p;
|
|
unsigned int i;
|
|
|
|
down_write(&volume->cell->vs_lock);
|
|
|
|
for (i = 0; i < slist->nr_servers; i++) {
|
|
se = &slist->servers[i];
|
|
server = se->server;
|
|
|
|
list_for_each(p, &server->volumes) {
|
|
pe = list_entry(p, struct afs_server_entry, slink);
|
|
if (volume->vid <= pe->volume->vid)
|
|
break;
|
|
}
|
|
list_add_tail(&se->slink, p);
|
|
}
|
|
|
|
slist->attached = true;
|
|
up_write(&volume->cell->vs_lock);
|
|
}
|
|
|
|
/*
|
|
* Reattach a volume to the servers it is going to use when server list is
|
|
* replaced. We try to switch the attachment points to avoid rewalking the
|
|
* lists.
|
|
*/
|
|
void afs_reattach_volume_to_servers(struct afs_volume *volume, struct afs_server_list *new,
|
|
struct afs_server_list *old)
|
|
{
|
|
unsigned int n = 0, o = 0;
|
|
|
|
down_write(&volume->cell->vs_lock);
|
|
|
|
while (n < new->nr_servers || o < old->nr_servers) {
|
|
struct afs_server_entry *pn = n < new->nr_servers ? &new->servers[n] : NULL;
|
|
struct afs_server_entry *po = o < old->nr_servers ? &old->servers[o] : NULL;
|
|
struct afs_server_entry *s;
|
|
struct list_head *p;
|
|
int diff;
|
|
|
|
if (pn && po && pn->server == po->server) {
|
|
pn->cb_expires_at = po->cb_expires_at;
|
|
list_replace(&po->slink, &pn->slink);
|
|
n++;
|
|
o++;
|
|
continue;
|
|
}
|
|
|
|
if (pn && po)
|
|
diff = memcmp(&pn->server->uuid, &po->server->uuid,
|
|
sizeof(pn->server->uuid));
|
|
else
|
|
diff = pn ? -1 : 1;
|
|
|
|
if (diff < 0) {
|
|
list_for_each(p, &pn->server->volumes) {
|
|
s = list_entry(p, struct afs_server_entry, slink);
|
|
if (volume->vid <= s->volume->vid)
|
|
break;
|
|
}
|
|
list_add_tail(&pn->slink, p);
|
|
n++;
|
|
} else {
|
|
list_del(&po->slink);
|
|
o++;
|
|
}
|
|
}
|
|
|
|
up_write(&volume->cell->vs_lock);
|
|
}
|
|
|
|
/*
|
|
* Detach a volume from the servers it has been using.
|
|
*/
|
|
void afs_detach_volume_from_servers(struct afs_volume *volume, struct afs_server_list *slist)
|
|
{
|
|
unsigned int i;
|
|
|
|
if (!slist->attached)
|
|
return;
|
|
|
|
down_write(&volume->cell->vs_lock);
|
|
|
|
for (i = 0; i < slist->nr_servers; i++)
|
|
list_del(&slist->servers[i].slink);
|
|
|
|
slist->attached = false;
|
|
up_write(&volume->cell->vs_lock);
|
|
}
|