From 629ba2b8d4f7566526efffb77080a4bc82bdd05e Mon Sep 17 00:00:00 2001 From: Boki Date: Wed, 18 Jun 2025 22:53:40 -0400 Subject: [PATCH] cleanup --- libs/data-frame/package.json | 33 --- libs/data-frame/src/index.ts | 501 ---------------------------------- libs/data-frame/tsconfig.json | 7 - 3 files changed, 541 deletions(-) delete mode 100644 libs/data-frame/package.json delete mode 100644 libs/data-frame/src/index.ts delete mode 100644 libs/data-frame/tsconfig.json diff --git a/libs/data-frame/package.json b/libs/data-frame/package.json deleted file mode 100644 index 3048a03..0000000 --- a/libs/data-frame/package.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "name": "@stock-bot/data-frame", - "version": "1.0.0", - "description": "DataFrame library for time series data manipulation", - "main": "dist/index.js", - "types": "dist/index.d.ts", - "type": "module", - "scripts": { - "build": "tsc", - "test": "bun test", - "clean": "rimraf dist" - }, - "dependencies": { - "@stock-bot/logger": "*", - "@stock-bot/utils": "*" - }, - "devDependencies": { - "@types/node": "^20.11.0", - "typescript": "^5.3.0", - "bun-types": "^1.2.15" - }, - "exports": { - ".": { - "import": "./dist/index.js", - "require": "./dist/index.js", - "types": "./dist/index.d.ts" - } - }, - "files": [ - "dist", - "README.md" - ] -} diff --git a/libs/data-frame/src/index.ts b/libs/data-frame/src/index.ts deleted file mode 100644 index 17ec275..0000000 --- a/libs/data-frame/src/index.ts +++ /dev/null @@ -1,501 +0,0 @@ -import { getLogger } from '@stock-bot/logger'; - -export interface DataFrameRow { - [key: string]: any; -} - -export interface DataFrameOptions { - index?: string; - columns?: string[]; - dtypes?: Record; -} - -export interface GroupByResult { - [key: string]: DataFrame; -} - -export interface AggregationFunction { - (values: any[]): any; -} - -export class DataFrame { - private data: DataFrameRow[]; - private _columns: string[]; - private _index: string; - private _dtypes: Record; - private logger = getLogger('dataframe'); - - constructor(data: DataFrameRow[] = [], options: DataFrameOptions = {}) { - this.data = [...data]; - this._index = options.index || 'index'; - this._columns = options.columns || this.inferColumns(); - this._dtypes = options.dtypes || {}; - - this.validateAndCleanData(); - } - - private inferColumns(): string[] { - if (this.data.length === 0) { - return []; - } - - const columns = new Set(); - for (const row of this.data) { - Object.keys(row).forEach(key => columns.add(key)); - } - - return Array.from(columns).sort(); - } - - private validateAndCleanData(): void { - if (this.data.length === 0) { - return; - } - - // Ensure all rows have the same columns - for (let i = 0; i < this.data.length; i++) { - const row = this.data[i]; - - // Add missing columns with null values - for (const col of this._columns) { - if (!(col in row)) { - row[col] = null; - } - } - - // Apply data type conversions - for (const [col, dtype] of Object.entries(this._dtypes)) { - if (col in row && row[col] !== null) { - row[col] = this.convertValue(row[col], dtype); - } - } - } - } - - private convertValue(value: any, dtype: string): any { - switch (dtype) { - case 'number': - return typeof value === 'number' ? value : parseFloat(value); - case 'string': - return String(value); - case 'boolean': - return Boolean(value); - case 'date': - return value instanceof Date ? value : new Date(value); - default: - return value; - } - } - - // Basic properties - get columns(): string[] { - return [...this._columns]; - } - - get index(): string { - return this._index; - } - - get length(): number { - return this.data.length; - } - - get shape(): [number, number] { - return [this.data.length, this._columns.length]; - } - - get empty(): boolean { - return this.data.length === 0; - } - - // Data access methods - head(n: number = 5): DataFrame { - return new DataFrame(this.data.slice(0, n), { - columns: this._columns, - index: this._index, - dtypes: this._dtypes, - }); - } - - tail(n: number = 5): DataFrame { - return new DataFrame(this.data.slice(-n), { - columns: this._columns, - index: this._index, - dtypes: this._dtypes, - }); - } - - iloc(start: number, end?: number): DataFrame { - const slice = end !== undefined ? this.data.slice(start, end) : this.data.slice(start); - return new DataFrame(slice, { - columns: this._columns, - index: this._index, - dtypes: this._dtypes, - }); - } - - at(index: number, column: string): any { - if (index < 0 || index >= this.data.length) { - throw new Error(`Index ${index} out of bounds`); - } - return this.data[index][column]; - } - - // Column operations - select(columns: string[]): DataFrame { - const validColumns = columns.filter(col => this._columns.includes(col)); - const newData = this.data.map(row => { - const newRow: DataFrameRow = {}; - for (const col of validColumns) { - newRow[col] = row[col]; - } - return newRow; - }); - - return new DataFrame(newData, { - columns: validColumns, - index: this._index, - dtypes: this.filterDtypes(validColumns), - }); - } - - drop(columns: string[]): DataFrame { - const remainingColumns = this._columns.filter(col => !columns.includes(col)); - return this.select(remainingColumns); - } - - getColumn(column: string): any[] { - if (!this._columns.includes(column)) { - throw new Error(`Column '${column}' not found`); - } - return this.data.map(row => row[column]); - } - - setColumn(column: string, values: any[]): DataFrame { - if (values.length !== this.data.length) { - throw new Error('Values length must match DataFrame length'); - } - - const newData = this.data.map((row, index) => ({ - ...row, - [column]: values[index], - })); - - const newColumns = this._columns.includes(column) ? this._columns : [...this._columns, column]; - - return new DataFrame(newData, { - columns: newColumns, - index: this._index, - dtypes: this._dtypes, - }); - } - - // Filtering - filter(predicate: (row: DataFrameRow, index: number) => boolean): DataFrame { - const filteredData = this.data.filter(predicate); - return new DataFrame(filteredData, { - columns: this._columns, - index: this._index, - dtypes: this._dtypes, - }); - } - - where(column: string, operator: '>' | '<' | '>=' | '<=' | '==' | '!=', value: any): DataFrame { - return this.filter(row => { - const cellValue = row[column]; - switch (operator) { - case '>': - return cellValue > value; - case '<': - return cellValue < value; - case '>=': - return cellValue >= value; - case '<=': - return cellValue <= value; - case '==': - return cellValue === value; - case '!=': - return cellValue !== value; - default: - return false; - } - }); - } - - // Sorting - sort(column: string, ascending: boolean = true): DataFrame { - const sortedData = [...this.data].sort((a, b) => { - const aVal = a[column]; - const bVal = b[column]; - - if (aVal === bVal) { - return 0; - } - - const comparison = aVal > bVal ? 1 : -1; - return ascending ? comparison : -comparison; - }); - - return new DataFrame(sortedData, { - columns: this._columns, - index: this._index, - dtypes: this._dtypes, - }); - } - - // Aggregation - groupBy(column: string): GroupByResult { - const groups: Record = {}; - - for (const row of this.data) { - const key = String(row[column]); - if (!groups[key]) { - groups[key] = []; - } - groups[key].push(row); - } - - const result: GroupByResult = {}; - for (const [key, rows] of Object.entries(groups)) { - result[key] = new DataFrame(rows, { - columns: this._columns, - index: this._index, - dtypes: this._dtypes, - }); - } - - return result; - } - - agg(aggregations: Record): DataFrameRow { - const result: DataFrameRow = {}; - - for (const [column, func] of Object.entries(aggregations)) { - if (!this._columns.includes(column)) { - throw new Error(`Column '${column}' not found`); - } - - const values = this.getColumn(column).filter(val => val !== null && val !== undefined); - result[column] = func(values); - } - - return result; - } - - // Statistical methods - mean(column: string): number { - const values = this.getColumn(column).filter(val => typeof val === 'number'); - return values.reduce((sum, val) => sum + val, 0) / values.length; - } - - sum(column: string): number { - const values = this.getColumn(column).filter(val => typeof val === 'number'); - return values.reduce((sum, val) => sum + val, 0); - } - - min(column: string): number { - const values = this.getColumn(column).filter(val => typeof val === 'number'); - return Math.min(...values); - } - - max(column: string): number { - const values = this.getColumn(column).filter(val => typeof val === 'number'); - return Math.max(...values); - } - - std(column: string): number { - const values = this.getColumn(column).filter(val => typeof val === 'number'); - const mean = values.reduce((sum, val) => sum + val, 0) / values.length; - const variance = values.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / values.length; - return Math.sqrt(variance); - } - - // Time series specific methods - resample(timeColumn: string, frequency: string): DataFrame { - // Simple resampling implementation - // For production, you'd want more sophisticated time-based grouping - const sorted = this.sort(timeColumn); - - switch (frequency) { - case '1H': - return this.resampleByHour(sorted, timeColumn); - case '1D': - return this.resampleByDay(sorted, timeColumn); - default: - throw new Error(`Unsupported frequency: ${frequency}`); - } - } - - private resampleByHour(sorted: DataFrame, timeColumn: string): DataFrame { - const groups: Record = {}; - - for (const row of sorted.data) { - const date = new Date(row[timeColumn]); - const hourKey = `${date.getFullYear()}-${date.getMonth()}-${date.getDate()}-${date.getHours()}`; - - if (!groups[hourKey]) { - groups[hourKey] = []; - } - groups[hourKey].push(row); - } - - const aggregatedData: DataFrameRow[] = []; - for (const [key, rows] of Object.entries(groups)) { - const tempDf = new DataFrame(rows, { - columns: this._columns, - index: this._index, - dtypes: this._dtypes, - }); - - // Create OHLCV aggregation - const aggregated: DataFrameRow = { - [timeColumn]: rows[0][timeColumn], - open: rows[0].close || rows[0].price, - high: tempDf.max('high') || tempDf.max('close') || tempDf.max('price'), - low: tempDf.min('low') || tempDf.min('close') || tempDf.min('price'), - close: rows[rows.length - 1].close || rows[rows.length - 1].price, - volume: tempDf.sum('volume') || 0, - }; - - aggregatedData.push(aggregated); - } - - return new DataFrame(aggregatedData); - } - - private resampleByDay(sorted: DataFrame, timeColumn: string): DataFrame { - // Similar to resampleByHour but group by day - const groups: Record = {}; - - for (const row of sorted.data) { - const date = new Date(row[timeColumn]); - const dayKey = `${date.getFullYear()}-${date.getMonth()}-${date.getDate()}`; - - if (!groups[dayKey]) { - groups[dayKey] = []; - } - groups[dayKey].push(row); - } - - const aggregatedData: DataFrameRow[] = []; - for (const [key, rows] of Object.entries(groups)) { - const tempDf = new DataFrame(rows, { - columns: this._columns, - index: this._index, - dtypes: this._dtypes, - }); - - const aggregated: DataFrameRow = { - [timeColumn]: rows[0][timeColumn], - open: rows[0].close || rows[0].price, - high: tempDf.max('high') || tempDf.max('close') || tempDf.max('price'), - low: tempDf.min('low') || tempDf.min('close') || tempDf.min('price'), - close: rows[rows.length - 1].close || rows[rows.length - 1].price, - volume: tempDf.sum('volume') || 0, - }; - - aggregatedData.push(aggregated); - } - - return new DataFrame(aggregatedData); - } - - // Utility methods - copy(): DataFrame { - return new DataFrame( - this.data.map(row => ({ ...row })), - { - columns: this._columns, - index: this._index, - dtypes: { ...this._dtypes }, - } - ); - } - - concat(other: DataFrame): DataFrame { - const combinedData = [...this.data, ...other.data]; - const combinedColumns = Array.from(new Set([...this._columns, ...other._columns])); - - return new DataFrame(combinedData, { - columns: combinedColumns, - index: this._index, - dtypes: { ...this._dtypes, ...other._dtypes }, - }); - } - - toArray(): DataFrameRow[] { - return this.data.map(row => ({ ...row })); - } - - toJSON(): string { - return JSON.stringify(this.data); - } - - private filterDtypes( - columns: string[] - ): Record { - const filtered: Record = {}; - for (const col of columns) { - if (this._dtypes[col]) { - filtered[col] = this._dtypes[col]; - } - } - return filtered; - } - - // Display method - toString(): string { - if (this.empty) { - return 'Empty DataFrame'; - } - - const maxRows = 10; - const displayData = this.data.slice(0, maxRows); - - let result = `DataFrame (${this.length} rows x ${this._columns.length} columns)\n`; - result += this._columns.join('\t') + '\n'; - result += '-'.repeat(this._columns.join('\t').length) + '\n'; - - for (const row of displayData) { - const values = this._columns.map(col => String(row[col] ?? 'null')); - result += values.join('\t') + '\n'; - } - - if (this.length > maxRows) { - result += `... (${this.length - maxRows} more rows)\n`; - } - - return result; - } -} - -// Factory functions -export function createDataFrame(data: DataFrameRow[], options?: DataFrameOptions): DataFrame { - return new DataFrame(data, options); -} - -export function readCSV(csvData: string, options?: DataFrameOptions): DataFrame { - const lines = csvData.trim().split('\n'); - if (lines.length === 0) { - return new DataFrame(); - } - - const headers = lines[0].split(',').map(h => h.trim()); - const data: DataFrameRow[] = []; - - for (let i = 1; i < lines.length; i++) { - const values = lines[i].split(',').map(v => v.trim()); - const row: DataFrameRow = {}; - - for (let j = 0; j < headers.length; j++) { - row[headers[j]] = values[j] || null; - } - - data.push(row); - } - - return new DataFrame(data, { - columns: headers, - ...options, - }); -} diff --git a/libs/data-frame/tsconfig.json b/libs/data-frame/tsconfig.json deleted file mode 100644 index a9ca512..0000000 --- a/libs/data-frame/tsconfig.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "extends": "../../tsconfig.lib.json", - "references": [ - { "path": "../logger" }, - { "path": "../utils" } - ] -}