smarter "clean remote files"

this should (optionally) skip in-use files, and files that have been
seen recently
This commit is contained in:
dakkar 2025-06-24 15:34:13 +01:00
parent a4c0ef824c
commit c68b8d6e7c
3 changed files with 49 additions and 19 deletions

View file

@ -684,8 +684,11 @@ export class QueueService {
}
@bindThis
public createCleanRemoteFilesJob() {
return this.objectStorageQueue.add('cleanRemoteFiles', {}, {
public createCleanRemoteFilesJob(olderThanSeconds: number = 0, keepFilesInUse: boolean = false) {
return this.objectStorageQueue.add('cleanRemoteFiles', {
keepFilesInUse,
olderThanSeconds,
}, {
removeOnComplete: {
age: 3600 * 24 * 7, // keep up to 7 days
count: 30,

View file

@ -4,14 +4,17 @@
*/
import { Inject, Injectable } from '@nestjs/common';
import { IsNull, MoreThan, Not } from 'typeorm';
import { IsNull, MoreThan, Not, Brackets } from 'typeorm';
import { DI } from '@/di-symbols.js';
import type { MiDriveFile, DriveFilesRepository } from '@/models/_.js';
import { MiUser } from '@/models/_.js';
import type Logger from '@/logger.js';
import { DriveService } from '@/core/DriveService.js';
import { bindThis } from '@/decorators.js';
import { QueueLoggerService } from '../QueueLoggerService.js';
import type * as Bull from 'bullmq';
import type { CleanRemoteFilesJobData } from '../types.js';
import { IdService } from '@/core/IdService.js';
@Injectable()
export class CleanRemoteFilesProcessorService {
@ -23,35 +26,54 @@ export class CleanRemoteFilesProcessorService {
private driveService: DriveService,
private queueLoggerService: QueueLoggerService,
private idService: IdService,
) {
this.logger = this.queueLoggerService.logger.createSubLogger('clean-remote-files');
}
@bindThis
public async process(job: Bull.Job<Record<string, unknown>>): Promise<void> {
public async process(job: Bull.Job<CleanRemoteFilesJobData>): Promise<void> {
this.logger.info('Deleting cached remote files...');
const olderThanTimestamp = Date.now() - job.data.olderThanSeconds * 1000;
const olderThanDate = new Date(olderThanTimestamp);
const keepFilesInUse = job.data.keepFilesInUse;
let deletedCount = 0;
let cursor: MiDriveFile['id'] | null = null;
let errorCount = 0;
const total = await this.driveFilesRepository.countBy({
userHost: Not(IsNull()),
isLink: false,
});
const filesQuery = this.driveFilesRepository.createQueryBuilder('file')
.where('file.userHost IS NOT NULL') // remote files
.andWhere('file.isLink = FALSE') // cached
.andWhere('file.id <= :id', { id: this.idService.gen(olderThanTimestamp) }) // and old
.orderBy('file.id', 'ASC');
if (keepFilesInUse) {
filesQuery
// are they used as avatar&&c?
.leftJoinAndSelect(
MiUser, 'fileuser',
'fileuser."avatarId"="file"."id" OR fileuser."bannerId"="file"."id" OR fileuser."backgroundId"="file"."id"'
)
.andWhere(
new Brackets((qb) => {
qb.where('fileuser.id IS NULL') // not used
.orWhere( // or attached to a user
new Brackets((qb) => {
qb.where('fileuser.lastFetchedAt IS NOT NULL') // weird? maybe this only applies to local users
.andWhere('fileuser.lastFetchedAt < :old', { old: olderThanDate }); // old user
})
);
})
);
}
const total = await filesQuery.clone().getCount();
while (true) {
const files = await this.driveFilesRepository.find({
where: {
userHost: Not(IsNull()),
isLink: false,
...(cursor ? { id: MoreThan(cursor) } : {}),
},
take: 256,
order: {
id: 1,
},
});
const thisBatchQuery = filesQuery.clone();
if (cursor) thisBatchQuery.andWhere('file.id > :cursor', { cursor });
const files = await thisBatchQuery.take(256).getMany();
if (files.length === 0) {
job.updateProgress(100);

View file

@ -40,6 +40,11 @@ export type RelationshipJobData = {
withReplies?: boolean;
};
export type CleanRemoteFilesJobData = {
keepFilesInUse: boolean;
olderThanSeconds: number;
};
export type DbJobData<T extends keyof DbJobMap> = DbJobMap[T];
export type DbJobMap = {