Skip to content

Hitless upgrades #3021

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 25 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 78 additions & 27 deletions packages/client/lib/client/commands-queue.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { SinglyLinkedList, DoublyLinkedNode, DoublyLinkedList } from './linked-list';
import { DoublyLinkedNode, DoublyLinkedList, EmptyAwareSinglyLinkedList } from './linked-list';
import encodeCommand from '../RESP/encoder';
import { Decoder, PUSH_TYPE_MAPPING, RESP_TYPES } from '../RESP/decoder';
import { TypeMapping, ReplyUnion, RespVersions, RedisArgument } from '../RESP/types';
import { ChannelListeners, PubSub, PubSubCommand, PubSubListener, PubSubType, PubSubTypeListeners } from './pub-sub';
import { AbortError, ErrorReply, TimeoutError } from '../errors';
import { AbortError, ErrorReply, CommandTimeoutDuringMaintananceError, TimeoutError } from '../errors';
import { MonitorCallback } from '.';

export interface CommandOptions<T = TypeMapping> {
Expand All @@ -30,6 +30,7 @@ export interface CommandToWrite extends CommandWaitingForReply {
timeout: {
signal: AbortSignal;
listener: () => unknown;
originalTimeout: number | undefined;
} | undefined;
}

Expand All @@ -50,22 +51,70 @@ const RESP2_PUSH_TYPE_MAPPING = {
[RESP_TYPES.SIMPLE_STRING]: Buffer
};

// Try to handle a push notification. Return whether you
// successfully consumed the notification or not. This is
// important in order for the queue to be able to pass the
// notification to another handler if the current one did not
// succeed.
type PushHandler = (pushItems: Array<any>) => boolean;

export default class RedisCommandsQueue {
readonly #respVersion;
readonly #maxLength;
readonly #toWrite = new DoublyLinkedList<CommandToWrite>();
readonly #waitingForReply = new SinglyLinkedList<CommandWaitingForReply>();
readonly #waitingForReply = new EmptyAwareSinglyLinkedList<CommandWaitingForReply>();
readonly #onShardedChannelMoved;
#chainInExecution: symbol | undefined;
readonly decoder;
readonly #pubSub = new PubSub();

#pushHandlers: PushHandler[] = [this.#onPush.bind(this)];

#inMaintenance = false;

set inMaintenance(value: boolean) {
this.#inMaintenance = value;
}

#maintenanceCommandTimeout: number | undefined

setMaintenanceCommandTimeout(ms: number | undefined) {
// Prevent possible api misuse
if (this.#maintenanceCommandTimeout === ms) return;

this.#maintenanceCommandTimeout = ms;

// Overwrite timeouts of all eligible toWrite commands
this.#toWrite.forEachNode(node => {
const command = node.value;

// Remove timeout listener if it exists
RedisCommandsQueue.#removeTimeoutListener(command)

// Determine newTimeout
const newTimeout = this.#maintenanceCommandTimeout ?? command.timeout?.originalTimeout;
// if no timeout is given and the command didnt have any timeout before, skip
if (!newTimeout) return;


// Overwrite the command's timeout
const signal = AbortSignal.timeout(newTimeout);
command.timeout = {
signal,
listener: () => {
this.#toWrite.remove(node);
command.reject(this.#inMaintenance ? new CommandTimeoutDuringMaintananceError(newTimeout) : new TimeoutError());
},
originalTimeout: command.timeout?.originalTimeout
};
signal.addEventListener('abort', command.timeout.listener, { once: true });
});
}

get isPubSubActive() {
return this.#pubSub.isActive;
}

#invalidateCallback?: (key: RedisArgument | null) => unknown;

constructor(
respVersion: RespVersions,
maxLength: number | null | undefined,
Expand Down Expand Up @@ -107,6 +156,7 @@ export default class RedisCommandsQueue {
}
return true;
}
return false
}

#getTypeMapping() {
Expand All @@ -119,30 +169,27 @@ export default class RedisCommandsQueue {
onErrorReply: err => this.#onErrorReply(err),
//TODO: we can shave off a few cycles by not adding onPush handler at all if CSC is not used
onPush: push => {
if (!this.#onPush(push)) {
// currently only supporting "invalidate" over RESP3 push messages
switch (push[0].toString()) {
case "invalidate": {
if (this.#invalidateCallback) {
if (push[1] !== null) {
for (const key of push[1]) {
this.#invalidateCallback(key);
}
} else {
this.#invalidateCallback(null);
}
}
break;
}
}
for(const pushHandler of this.#pushHandlers) {
if(pushHandler(push)) return
}
},
getTypeMapping: () => this.#getTypeMapping()
});
}

setInvalidateCallback(callback?: (key: RedisArgument | null) => unknown) {
this.#invalidateCallback = callback;
addPushHandler(handler: PushHandler): void {
this.#pushHandlers.push(handler);
}

async waitForInflightCommandsToComplete(): Promise<void> {
// In-flight commands already completed
if(this.#waitingForReply.length === 0) {
return
};
// Otherwise wait for in-flight commands to fire `empty` event
return new Promise(resolve => {
this.#waitingForReply.events.on('empty', resolve)
});
}

addCommand<T>(
Expand All @@ -168,15 +215,19 @@ export default class RedisCommandsQueue {
typeMapping: options?.typeMapping
};

const timeout = options?.timeout;
// If #maintenanceCommandTimeout was explicitly set, we should
// use it instead of the timeout provided by the command
const timeout = this.#maintenanceCommandTimeout || options?.timeout
if (timeout) {

const signal = AbortSignal.timeout(timeout);
value.timeout = {
signal,
listener: () => {
this.#toWrite.remove(node);
value.reject(new TimeoutError());
}
value.reject(this.#inMaintenance ? new CommandTimeoutDuringMaintananceError(timeout) : new TimeoutError());
},
originalTimeout: options?.timeout
};
signal.addEventListener('abort', value.timeout.listener, { once: true });
}
Expand Down Expand Up @@ -432,7 +483,7 @@ export default class RedisCommandsQueue {
}

static #removeTimeoutListener(command: CommandToWrite) {
command.timeout!.signal.removeEventListener('abort', command.timeout!.listener);
command.timeout?.signal.removeEventListener('abort', command.timeout!.listener);
}

static #flushToWrite(toBeSent: CommandToWrite, err: Error) {
Expand Down
120 changes: 120 additions & 0 deletions packages/client/lib/client/enterprise-maintenance-manager.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import EventEmitter from "events";
import { RedisClientOptions } from ".";
import RedisCommandsQueue from "./commands-queue";
import RedisSocket from "./socket";

export const MAINTENANCE_EVENTS = {
PAUSE_WRITING: "pause-writing",
RESUME_WRITING: "resume-writing",
TIMEOUTS_UPDATE: "timeouts-update",
} as const;

const PN = {
MOVING: "MOVING",
MIGRATING: "MIGRATING",
MIGRATED: "MIGRATED",
FAILING_OVER: "FAILING_OVER",
FAILED_OVER: "FAILED_OVER",
};

export interface SocketTimeoutUpdate {
inMaintenance: boolean,
timeout?: number
}

export default class EnterpriseMaintenanceManager extends EventEmitter {
#commandsQueue: RedisCommandsQueue;
#options: RedisClientOptions;
constructor(commandsQueue: RedisCommandsQueue, options: RedisClientOptions) {
super();
this.#commandsQueue = commandsQueue;
this.#options = options;

this.#commandsQueue.addPushHandler(this.#onPush);
}

#onPush = (push: Array<any>): boolean => {
switch (push[0].toString()) {
case PN.MOVING: {
const [_, afterMs, url] = push;
const [host, port] = url.toString().split(":");
this.#onMoving(afterMs, host, Number(port));
return true;
}
case PN.MIGRATING:
case PN.FAILING_OVER: {
this.#onMigrating();
return true;
}
case PN.MIGRATED:
case PN.FAILED_OVER: {
this.#onMigrated();
return true;
}
}
return false;
};

// Queue:
// toWrite [ C D E ]
// waitingForReply [ A B ] - aka In-flight commands
//
// time: ---1-2---3-4-5-6---------------------------
//
// 1. [EVENT] MOVING PN received
// 2. [ACTION] Pause writing ( we need to wait for new socket to connect and for all in-flight commands to complete )
// 3. [EVENT] New socket connected
// 4. [EVENT] In-flight commands completed
// 5. [ACTION] Destroy old socket
// 6. [ACTION] Resume writing -> we are going to write to the new socket from now on
#onMoving = async (
_afterMs: number,
host: string,
port: number,
): Promise<void> => {
// 1 [EVENT] MOVING PN received
// 2 [ACTION] Pause writing
this.emit(MAINTENANCE_EVENTS.PAUSE_WRITING);
this.#onMigrating();

const newSocket = new RedisSocket({
...this.#options.socket,
host,
port,
});
//todo
newSocket.setMaintenanceTimeout();
await newSocket.connect();
// 3 [EVENT] New socket connected

await this.#commandsQueue.waitForInflightCommandsToComplete();
// 4 [EVENT] In-flight commands completed

// 5 + 6
this.emit(MAINTENANCE_EVENTS.RESUME_WRITING, newSocket);
this.#onMigrated();
};

#onMigrating = async () => {
this.#commandsQueue.inMaintenance = true;
this.#commandsQueue.setMaintenanceCommandTimeout(
this.#options.gracefulMaintenance?.relaxedCommandTimeout,
);

this.emit(MAINTENANCE_EVENTS.TIMEOUTS_UPDATE, {
inMaintenance: true,
timeout: this.#options.gracefulMaintenance?.relaxedSocketTimeout
} satisfies SocketTimeoutUpdate);
};

#onMigrated = async () => {
this.#commandsQueue.inMaintenance = false;
this.#commandsQueue.setMaintenanceCommandTimeout(undefined);

this.emit(MAINTENANCE_EVENTS.TIMEOUTS_UPDATE, {
inMaintenance: false,
timeout: undefined
} satisfies SocketTimeoutUpdate);
};

};
Loading
Loading