234 lines
6.3 KiB
TypeScript
234 lines
6.3 KiB
TypeScript
import { getLogger } from '@stock-bot/logger';
|
|
import type { HealthStatus, PerformanceMetrics, QueryResult } from './types';
|
|
|
|
// Interface to avoid circular dependency
|
|
interface QuestDBClientInterface {
|
|
query<T = any>(sql: string, params?: any[]): Promise<QueryResult<T>>;
|
|
isPgPoolHealthy(): boolean;
|
|
}
|
|
|
|
/**
|
|
* QuestDB Health Monitor
|
|
*
|
|
* Monitors connection health, performance metrics, and provides
|
|
* automatic recovery capabilities for the QuestDB client.
|
|
*/
|
|
export class QuestDBHealthMonitor {
|
|
private readonly logger: ReturnType<typeof getLogger>;
|
|
private healthCheckInterval: NodeJS.Timeout | null = null;
|
|
private lastHealthCheck: Date | null = null;
|
|
private performanceMetrics: PerformanceMetrics = {
|
|
totalQueries: 0,
|
|
successfulQueries: 0,
|
|
failedQueries: 0,
|
|
averageResponseTime: 0,
|
|
lastQueryTime: null,
|
|
connectionUptime: 0,
|
|
memoryUsage: 0,
|
|
};
|
|
constructor(private readonly client: QuestDBClientInterface) {
|
|
this.logger = getLogger('questdb-health-monitor');
|
|
}
|
|
|
|
/**
|
|
* Start health monitoring
|
|
*/
|
|
public startMonitoring(intervalMs: number = 30000): void {
|
|
if (this.healthCheckInterval) {
|
|
this.stopMonitoring();
|
|
}
|
|
|
|
this.logger.info(`Starting health monitoring with ${intervalMs}ms interval`);
|
|
|
|
this.healthCheckInterval = setInterval(async () => {
|
|
try {
|
|
await this.performHealthCheck();
|
|
} catch (error) {
|
|
this.logger.error('Health check failed', error);
|
|
}
|
|
}, intervalMs);
|
|
|
|
// Perform initial health check
|
|
this.performHealthCheck().catch(error => {
|
|
this.logger.error('Initial health check failed', error);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Stop health monitoring
|
|
*/
|
|
public stopMonitoring(): void {
|
|
if (this.healthCheckInterval) {
|
|
clearInterval(this.healthCheckInterval);
|
|
this.healthCheckInterval = null;
|
|
this.logger.info('Health monitoring stopped');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Perform a health check
|
|
*/
|
|
public async performHealthCheck(): Promise<HealthStatus> {
|
|
const startTime = Date.now();
|
|
|
|
try {
|
|
// Test basic connectivity with a simple query
|
|
await this.client.query('SELECT 1 as health_check');
|
|
|
|
const responseTime = Date.now() - startTime;
|
|
this.lastHealthCheck = new Date();
|
|
|
|
const status: HealthStatus = {
|
|
isHealthy: true,
|
|
lastCheck: this.lastHealthCheck,
|
|
responseTime,
|
|
message: 'Connection healthy',
|
|
details: {
|
|
pgPool: this.client.isPgPoolHealthy(),
|
|
httpEndpoint: true, // Will be implemented when HTTP client is added
|
|
uptime: this.getUptime(),
|
|
},
|
|
};
|
|
|
|
this.logger.debug('Health check passed', { responseTime });
|
|
return status;
|
|
} catch (error) {
|
|
const responseTime = Date.now() - startTime;
|
|
this.lastHealthCheck = new Date();
|
|
|
|
const status: HealthStatus = {
|
|
isHealthy: false,
|
|
lastCheck: this.lastHealthCheck,
|
|
responseTime,
|
|
message: `Health check failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
|
|
error: error instanceof Error ? error : new Error('Unknown error'),
|
|
details: {
|
|
pgPool: false,
|
|
httpEndpoint: false,
|
|
uptime: this.getUptime(),
|
|
},
|
|
};
|
|
|
|
this.logger.error('Health check failed', { error, responseTime });
|
|
return status;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get current health status
|
|
*/
|
|
public async getHealthStatus(): Promise<HealthStatus> {
|
|
if (!this.lastHealthCheck || Date.now() - this.lastHealthCheck.getTime() > 60000) {
|
|
return await this.performHealthCheck();
|
|
}
|
|
|
|
// Return cached status if recent
|
|
return {
|
|
isHealthy: true,
|
|
lastCheck: this.lastHealthCheck,
|
|
responseTime: 0,
|
|
message: 'Using cached health status',
|
|
details: {
|
|
pgPool: this.client.isPgPoolHealthy(),
|
|
httpEndpoint: true,
|
|
uptime: this.getUptime(),
|
|
},
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Record query performance metrics
|
|
*/
|
|
public recordQuery(success: boolean, responseTime: number): void {
|
|
this.performanceMetrics.totalQueries++;
|
|
this.performanceMetrics.lastQueryTime = new Date();
|
|
|
|
if (success) {
|
|
this.performanceMetrics.successfulQueries++;
|
|
} else {
|
|
this.performanceMetrics.failedQueries++;
|
|
}
|
|
|
|
// Update rolling average response time
|
|
const totalResponseTime =
|
|
this.performanceMetrics.averageResponseTime * (this.performanceMetrics.totalQueries - 1) +
|
|
responseTime;
|
|
this.performanceMetrics.averageResponseTime =
|
|
totalResponseTime / this.performanceMetrics.totalQueries;
|
|
|
|
// Update memory usage
|
|
this.performanceMetrics.memoryUsage = process.memoryUsage().heapUsed;
|
|
}
|
|
|
|
/**
|
|
* Get performance metrics
|
|
*/
|
|
public getPerformanceMetrics(): PerformanceMetrics {
|
|
return { ...this.performanceMetrics };
|
|
}
|
|
|
|
/**
|
|
* Get connection uptime in seconds
|
|
*/
|
|
private getUptime(): number {
|
|
return Math.floor(process.uptime());
|
|
}
|
|
|
|
/**
|
|
* Reset performance metrics
|
|
*/
|
|
public resetMetrics(): void {
|
|
this.performanceMetrics = {
|
|
totalQueries: 0,
|
|
successfulQueries: 0,
|
|
failedQueries: 0,
|
|
averageResponseTime: 0,
|
|
lastQueryTime: null,
|
|
connectionUptime: this.getUptime(),
|
|
memoryUsage: process.memoryUsage().heapUsed,
|
|
};
|
|
|
|
this.logger.debug('Performance metrics reset');
|
|
}
|
|
|
|
/**
|
|
* Get health summary for monitoring dashboards
|
|
*/
|
|
public async getHealthSummary(): Promise<{
|
|
status: HealthStatus;
|
|
metrics: PerformanceMetrics;
|
|
recommendations: string[];
|
|
}> {
|
|
const status = await this.getHealthStatus();
|
|
const metrics = this.getPerformanceMetrics();
|
|
const recommendations: string[] = [];
|
|
|
|
// Generate recommendations based on metrics
|
|
if (metrics.failedQueries > metrics.successfulQueries * 0.1) {
|
|
recommendations.push('High error rate detected - check query patterns');
|
|
}
|
|
|
|
if (metrics.averageResponseTime > 1000) {
|
|
recommendations.push('High response times - consider query optimization');
|
|
}
|
|
|
|
if (metrics.memoryUsage > 100 * 1024 * 1024) {
|
|
// 100MB
|
|
recommendations.push('High memory usage - monitor for memory leaks');
|
|
}
|
|
|
|
return {
|
|
status,
|
|
metrics,
|
|
recommendations,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Cleanup resources
|
|
*/
|
|
public destroy(): void {
|
|
this.stopMonitoring();
|
|
this.logger.debug('Health monitor destroyed');
|
|
}
|
|
}
|