This commit is contained in:
Bojan Kucera 2025-06-04 22:46:01 -04:00
parent 7993148a95
commit 528be93804
38 changed files with 4617 additions and 1081 deletions

View file

@ -0,0 +1,23 @@
{
"name": "@stock-bot/data-frame",
"version": "1.0.0",
"description": "DataFrame library for time series data manipulation",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"scripts": {
"build": "tsc",
"dev": "tsc --watch",
"test": "bun test"
},
"dependencies": {
"@stock-bot/logger": "workspace:*",
"@stock-bot/utils": "workspace:*"
},
"devDependencies": {
"@types/node": "^20.10.0",
"typescript": "^5.3.0"
},
"peerDependencies": {
"bun-types": "*"
}
}

View file

@ -0,0 +1,485 @@
import { createLogger } from '@stock-bot/logger';
export interface DataFrameRow {
[key: string]: any;
}
export interface DataFrameOptions {
index?: string;
columns?: string[];
dtypes?: Record<string, 'number' | 'string' | 'boolean' | 'date'>;
}
export interface GroupByResult {
[key: string]: DataFrame;
}
export interface AggregationFunction {
(values: any[]): any;
}
export class DataFrame {
private data: DataFrameRow[];
private _columns: string[];
private _index: string;
private _dtypes: Record<string, 'number' | 'string' | 'boolean' | 'date'>;
private logger = createLogger('dataframe');
constructor(data: DataFrameRow[] = [], options: DataFrameOptions = {}) {
this.data = [...data];
this._index = options.index || 'index';
this._columns = options.columns || this.inferColumns();
this._dtypes = options.dtypes || {};
this.validateAndCleanData();
}
private inferColumns(): string[] {
if (this.data.length === 0) return [];
const columns = new Set<string>();
for (const row of this.data) {
Object.keys(row).forEach(key => columns.add(key));
}
return Array.from(columns).sort();
}
private validateAndCleanData(): void {
if (this.data.length === 0) return;
// Ensure all rows have the same columns
for (let i = 0; i < this.data.length; i++) {
const row = this.data[i];
// Add missing columns with null values
for (const col of this._columns) {
if (!(col in row)) {
row[col] = null;
}
}
// Apply data type conversions
for (const [col, dtype] of Object.entries(this._dtypes)) {
if (col in row && row[col] !== null) {
row[col] = this.convertValue(row[col], dtype);
}
}
}
}
private convertValue(value: any, dtype: string): any {
switch (dtype) {
case 'number':
return typeof value === 'number' ? value : parseFloat(value);
case 'string':
return String(value);
case 'boolean':
return Boolean(value);
case 'date':
return value instanceof Date ? value : new Date(value);
default:
return value;
}
}
// Basic properties
get columns(): string[] {
return [...this._columns];
}
get index(): string {
return this._index;
}
get length(): number {
return this.data.length;
}
get shape(): [number, number] {
return [this.data.length, this._columns.length];
}
get empty(): boolean {
return this.data.length === 0;
}
// Data access methods
head(n: number = 5): DataFrame {
return new DataFrame(this.data.slice(0, n), {
columns: this._columns,
index: this._index,
dtypes: this._dtypes
});
}
tail(n: number = 5): DataFrame {
return new DataFrame(this.data.slice(-n), {
columns: this._columns,
index: this._index,
dtypes: this._dtypes
});
}
iloc(start: number, end?: number): DataFrame {
const slice = end !== undefined ? this.data.slice(start, end) : this.data.slice(start);
return new DataFrame(slice, {
columns: this._columns,
index: this._index,
dtypes: this._dtypes
});
}
at(index: number, column: string): any {
if (index < 0 || index >= this.data.length) {
throw new Error(`Index ${index} out of bounds`);
}
return this.data[index][column];
}
// Column operations
select(columns: string[]): DataFrame {
const validColumns = columns.filter(col => this._columns.includes(col));
const newData = this.data.map(row => {
const newRow: DataFrameRow = {};
for (const col of validColumns) {
newRow[col] = row[col];
}
return newRow;
});
return new DataFrame(newData, {
columns: validColumns,
index: this._index,
dtypes: this.filterDtypes(validColumns)
});
}
drop(columns: string[]): DataFrame {
const remainingColumns = this._columns.filter(col => !columns.includes(col));
return this.select(remainingColumns);
}
getColumn(column: string): any[] {
if (!this._columns.includes(column)) {
throw new Error(`Column '${column}' not found`);
}
return this.data.map(row => row[column]);
}
setColumn(column: string, values: any[]): DataFrame {
if (values.length !== this.data.length) {
throw new Error('Values length must match DataFrame length');
}
const newData = this.data.map((row, index) => ({
...row,
[column]: values[index]
}));
const newColumns = this._columns.includes(column)
? this._columns
: [...this._columns, column];
return new DataFrame(newData, {
columns: newColumns,
index: this._index,
dtypes: this._dtypes
});
}
// Filtering
filter(predicate: (row: DataFrameRow, index: number) => boolean): DataFrame {
const filteredData = this.data.filter(predicate);
return new DataFrame(filteredData, {
columns: this._columns,
index: this._index,
dtypes: this._dtypes
});
}
where(column: string, operator: '>' | '<' | '>=' | '<=' | '==' | '!=', value: any): DataFrame {
return this.filter(row => {
const cellValue = row[column];
switch (operator) {
case '>': return cellValue > value;
case '<': return cellValue < value;
case '>=': return cellValue >= value;
case '<=': return cellValue <= value;
case '==': return cellValue === value;
case '!=': return cellValue !== value;
default: return false;
}
});
}
// Sorting
sort(column: string, ascending: boolean = true): DataFrame {
const sortedData = [...this.data].sort((a, b) => {
const aVal = a[column];
const bVal = b[column];
if (aVal === bVal) return 0;
const comparison = aVal > bVal ? 1 : -1;
return ascending ? comparison : -comparison;
});
return new DataFrame(sortedData, {
columns: this._columns,
index: this._index,
dtypes: this._dtypes
});
}
// Aggregation
groupBy(column: string): GroupByResult {
const groups: Record<string, DataFrameRow[]> = {};
for (const row of this.data) {
const key = String(row[column]);
if (!groups[key]) {
groups[key] = [];
}
groups[key].push(row);
}
const result: GroupByResult = {};
for (const [key, rows] of Object.entries(groups)) {
result[key] = new DataFrame(rows, {
columns: this._columns,
index: this._index,
dtypes: this._dtypes
});
}
return result;
}
agg(aggregations: Record<string, AggregationFunction>): DataFrameRow {
const result: DataFrameRow = {};
for (const [column, func] of Object.entries(aggregations)) {
if (!this._columns.includes(column)) {
throw new Error(`Column '${column}' not found`);
}
const values = this.getColumn(column).filter(val => val !== null && val !== undefined);
result[column] = func(values);
}
return result;
}
// Statistical methods
mean(column: string): number {
const values = this.getColumn(column).filter(val => typeof val === 'number');
return values.reduce((sum, val) => sum + val, 0) / values.length;
}
sum(column: string): number {
const values = this.getColumn(column).filter(val => typeof val === 'number');
return values.reduce((sum, val) => sum + val, 0);
}
min(column: string): number {
const values = this.getColumn(column).filter(val => typeof val === 'number');
return Math.min(...values);
}
max(column: string): number {
const values = this.getColumn(column).filter(val => typeof val === 'number');
return Math.max(...values);
}
std(column: string): number {
const values = this.getColumn(column).filter(val => typeof val === 'number');
const mean = values.reduce((sum, val) => sum + val, 0) / values.length;
const variance = values.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / values.length;
return Math.sqrt(variance);
}
// Time series specific methods
resample(timeColumn: string, frequency: string): DataFrame {
// Simple resampling implementation
// For production, you'd want more sophisticated time-based grouping
const sorted = this.sort(timeColumn);
switch (frequency) {
case '1H':
return this.resampleByHour(sorted, timeColumn);
case '1D':
return this.resampleByDay(sorted, timeColumn);
default:
throw new Error(`Unsupported frequency: ${frequency}`);
}
}
private resampleByHour(sorted: DataFrame, timeColumn: string): DataFrame {
const groups: Record<string, DataFrameRow[]> = {};
for (const row of sorted.data) {
const date = new Date(row[timeColumn]);
const hourKey = `${date.getFullYear()}-${date.getMonth()}-${date.getDate()}-${date.getHours()}`;
if (!groups[hourKey]) {
groups[hourKey] = [];
}
groups[hourKey].push(row);
}
const aggregatedData: DataFrameRow[] = [];
for (const [key, rows] of Object.entries(groups)) {
const tempDf = new DataFrame(rows, {
columns: this._columns,
index: this._index,
dtypes: this._dtypes
});
// Create OHLCV aggregation
const aggregated: DataFrameRow = {
[timeColumn]: rows[0][timeColumn],
open: rows[0].close || rows[0].price,
high: tempDf.max('high') || tempDf.max('close') || tempDf.max('price'),
low: tempDf.min('low') || tempDf.min('close') || tempDf.min('price'),
close: rows[rows.length - 1].close || rows[rows.length - 1].price,
volume: tempDf.sum('volume') || 0
};
aggregatedData.push(aggregated);
}
return new DataFrame(aggregatedData);
}
private resampleByDay(sorted: DataFrame, timeColumn: string): DataFrame {
// Similar to resampleByHour but group by day
const groups: Record<string, DataFrameRow[]> = {};
for (const row of sorted.data) {
const date = new Date(row[timeColumn]);
const dayKey = `${date.getFullYear()}-${date.getMonth()}-${date.getDate()}`;
if (!groups[dayKey]) {
groups[dayKey] = [];
}
groups[dayKey].push(row);
}
const aggregatedData: DataFrameRow[] = [];
for (const [key, rows] of Object.entries(groups)) {
const tempDf = new DataFrame(rows, {
columns: this._columns,
index: this._index,
dtypes: this._dtypes
});
const aggregated: DataFrameRow = {
[timeColumn]: rows[0][timeColumn],
open: rows[0].close || rows[0].price,
high: tempDf.max('high') || tempDf.max('close') || tempDf.max('price'),
low: tempDf.min('low') || tempDf.min('close') || tempDf.min('price'),
close: rows[rows.length - 1].close || rows[rows.length - 1].price,
volume: tempDf.sum('volume') || 0
};
aggregatedData.push(aggregated);
}
return new DataFrame(aggregatedData);
}
// Utility methods
copy(): DataFrame {
return new DataFrame(this.data.map(row => ({ ...row })), {
columns: this._columns,
index: this._index,
dtypes: { ...this._dtypes }
});
}
concat(other: DataFrame): DataFrame {
const combinedData = [...this.data, ...other.data];
const combinedColumns = Array.from(new Set([...this._columns, ...other._columns]));
return new DataFrame(combinedData, {
columns: combinedColumns,
index: this._index,
dtypes: { ...this._dtypes, ...other._dtypes }
});
}
toArray(): DataFrameRow[] {
return this.data.map(row => ({ ...row }));
}
toJSON(): string {
return JSON.stringify(this.data);
}
private filterDtypes(columns: string[]): Record<string, 'number' | 'string' | 'boolean' | 'date'> {
const filtered: Record<string, 'number' | 'string' | 'boolean' | 'date'> = {};
for (const col of columns) {
if (this._dtypes[col]) {
filtered[col] = this._dtypes[col];
}
}
return filtered;
}
// Display method
toString(): string {
if (this.empty) {
return 'Empty DataFrame';
}
const maxRows = 10;
const displayData = this.data.slice(0, maxRows);
let result = `DataFrame (${this.length} rows x ${this._columns.length} columns)\n`;
result += this._columns.join('\t') + '\n';
result += '-'.repeat(this._columns.join('\t').length) + '\n';
for (const row of displayData) {
const values = this._columns.map(col => String(row[col] ?? 'null'));
result += values.join('\t') + '\n';
}
if (this.length > maxRows) {
result += `... (${this.length - maxRows} more rows)\n`;
}
return result;
}
}
// Factory functions
export function createDataFrame(data: DataFrameRow[], options?: DataFrameOptions): DataFrame {
return new DataFrame(data, options);
}
export function readCSV(csvData: string, options?: DataFrameOptions): DataFrame {
const lines = csvData.trim().split('\n');
if (lines.length === 0) {
return new DataFrame();
}
const headers = lines[0].split(',').map(h => h.trim());
const data: DataFrameRow[] = [];
for (let i = 1; i < lines.length; i++) {
const values = lines[i].split(',').map(v => v.trim());
const row: DataFrameRow = {};
for (let j = 0; j < headers.length; j++) {
row[headers[j]] = values[j] || null;
}
data.push(row);
}
return new DataFrame(data, {
columns: headers,
...options
});
}

View file

@ -0,0 +1,16 @@
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"outDir": "./dist",
"rootDir": "./src",
"declaration": true
},
"include": [
"src/**/*"
],
"exclude": ["node_modules", "dist", "**/*.test.ts", "**/*.spec.ts"],
"references": [
{ "path": "../logger" },
{ "path": "../types" }
]
}