cleanup
This commit is contained in:
parent
ed0df3184a
commit
629ba2b8d4
3 changed files with 0 additions and 541 deletions
|
|
@ -1,33 +0,0 @@
|
|||
{
|
||||
"name": "@stock-bot/data-frame",
|
||||
"version": "1.0.0",
|
||||
"description": "DataFrame library for time series data manipulation",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"test": "bun test",
|
||||
"clean": "rimraf dist"
|
||||
},
|
||||
"dependencies": {
|
||||
"@stock-bot/logger": "*",
|
||||
"@stock-bot/utils": "*"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.11.0",
|
||||
"typescript": "^5.3.0",
|
||||
"bun-types": "^1.2.15"
|
||||
},
|
||||
"exports": {
|
||||
".": {
|
||||
"import": "./dist/index.js",
|
||||
"require": "./dist/index.js",
|
||||
"types": "./dist/index.d.ts"
|
||||
}
|
||||
},
|
||||
"files": [
|
||||
"dist",
|
||||
"README.md"
|
||||
]
|
||||
}
|
||||
|
|
@ -1,501 +0,0 @@
|
|||
import { getLogger } from '@stock-bot/logger';
|
||||
|
||||
export interface DataFrameRow {
|
||||
[key: string]: any;
|
||||
}
|
||||
|
||||
export interface DataFrameOptions {
|
||||
index?: string;
|
||||
columns?: string[];
|
||||
dtypes?: Record<string, 'number' | 'string' | 'boolean' | 'date'>;
|
||||
}
|
||||
|
||||
export interface GroupByResult {
|
||||
[key: string]: DataFrame;
|
||||
}
|
||||
|
||||
export interface AggregationFunction {
|
||||
(values: any[]): any;
|
||||
}
|
||||
|
||||
export class DataFrame {
|
||||
private data: DataFrameRow[];
|
||||
private _columns: string[];
|
||||
private _index: string;
|
||||
private _dtypes: Record<string, 'number' | 'string' | 'boolean' | 'date'>;
|
||||
private logger = getLogger('dataframe');
|
||||
|
||||
constructor(data: DataFrameRow[] = [], options: DataFrameOptions = {}) {
|
||||
this.data = [...data];
|
||||
this._index = options.index || 'index';
|
||||
this._columns = options.columns || this.inferColumns();
|
||||
this._dtypes = options.dtypes || {};
|
||||
|
||||
this.validateAndCleanData();
|
||||
}
|
||||
|
||||
private inferColumns(): string[] {
|
||||
if (this.data.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const columns = new Set<string>();
|
||||
for (const row of this.data) {
|
||||
Object.keys(row).forEach(key => columns.add(key));
|
||||
}
|
||||
|
||||
return Array.from(columns).sort();
|
||||
}
|
||||
|
||||
private validateAndCleanData(): void {
|
||||
if (this.data.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure all rows have the same columns
|
||||
for (let i = 0; i < this.data.length; i++) {
|
||||
const row = this.data[i];
|
||||
|
||||
// Add missing columns with null values
|
||||
for (const col of this._columns) {
|
||||
if (!(col in row)) {
|
||||
row[col] = null;
|
||||
}
|
||||
}
|
||||
|
||||
// Apply data type conversions
|
||||
for (const [col, dtype] of Object.entries(this._dtypes)) {
|
||||
if (col in row && row[col] !== null) {
|
||||
row[col] = this.convertValue(row[col], dtype);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private convertValue(value: any, dtype: string): any {
|
||||
switch (dtype) {
|
||||
case 'number':
|
||||
return typeof value === 'number' ? value : parseFloat(value);
|
||||
case 'string':
|
||||
return String(value);
|
||||
case 'boolean':
|
||||
return Boolean(value);
|
||||
case 'date':
|
||||
return value instanceof Date ? value : new Date(value);
|
||||
default:
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
// Basic properties
|
||||
get columns(): string[] {
|
||||
return [...this._columns];
|
||||
}
|
||||
|
||||
get index(): string {
|
||||
return this._index;
|
||||
}
|
||||
|
||||
get length(): number {
|
||||
return this.data.length;
|
||||
}
|
||||
|
||||
get shape(): [number, number] {
|
||||
return [this.data.length, this._columns.length];
|
||||
}
|
||||
|
||||
get empty(): boolean {
|
||||
return this.data.length === 0;
|
||||
}
|
||||
|
||||
// Data access methods
|
||||
head(n: number = 5): DataFrame {
|
||||
return new DataFrame(this.data.slice(0, n), {
|
||||
columns: this._columns,
|
||||
index: this._index,
|
||||
dtypes: this._dtypes,
|
||||
});
|
||||
}
|
||||
|
||||
tail(n: number = 5): DataFrame {
|
||||
return new DataFrame(this.data.slice(-n), {
|
||||
columns: this._columns,
|
||||
index: this._index,
|
||||
dtypes: this._dtypes,
|
||||
});
|
||||
}
|
||||
|
||||
iloc(start: number, end?: number): DataFrame {
|
||||
const slice = end !== undefined ? this.data.slice(start, end) : this.data.slice(start);
|
||||
return new DataFrame(slice, {
|
||||
columns: this._columns,
|
||||
index: this._index,
|
||||
dtypes: this._dtypes,
|
||||
});
|
||||
}
|
||||
|
||||
at(index: number, column: string): any {
|
||||
if (index < 0 || index >= this.data.length) {
|
||||
throw new Error(`Index ${index} out of bounds`);
|
||||
}
|
||||
return this.data[index][column];
|
||||
}
|
||||
|
||||
// Column operations
|
||||
select(columns: string[]): DataFrame {
|
||||
const validColumns = columns.filter(col => this._columns.includes(col));
|
||||
const newData = this.data.map(row => {
|
||||
const newRow: DataFrameRow = {};
|
||||
for (const col of validColumns) {
|
||||
newRow[col] = row[col];
|
||||
}
|
||||
return newRow;
|
||||
});
|
||||
|
||||
return new DataFrame(newData, {
|
||||
columns: validColumns,
|
||||
index: this._index,
|
||||
dtypes: this.filterDtypes(validColumns),
|
||||
});
|
||||
}
|
||||
|
||||
drop(columns: string[]): DataFrame {
|
||||
const remainingColumns = this._columns.filter(col => !columns.includes(col));
|
||||
return this.select(remainingColumns);
|
||||
}
|
||||
|
||||
getColumn(column: string): any[] {
|
||||
if (!this._columns.includes(column)) {
|
||||
throw new Error(`Column '${column}' not found`);
|
||||
}
|
||||
return this.data.map(row => row[column]);
|
||||
}
|
||||
|
||||
setColumn(column: string, values: any[]): DataFrame {
|
||||
if (values.length !== this.data.length) {
|
||||
throw new Error('Values length must match DataFrame length');
|
||||
}
|
||||
|
||||
const newData = this.data.map((row, index) => ({
|
||||
...row,
|
||||
[column]: values[index],
|
||||
}));
|
||||
|
||||
const newColumns = this._columns.includes(column) ? this._columns : [...this._columns, column];
|
||||
|
||||
return new DataFrame(newData, {
|
||||
columns: newColumns,
|
||||
index: this._index,
|
||||
dtypes: this._dtypes,
|
||||
});
|
||||
}
|
||||
|
||||
// Filtering
|
||||
filter(predicate: (row: DataFrameRow, index: number) => boolean): DataFrame {
|
||||
const filteredData = this.data.filter(predicate);
|
||||
return new DataFrame(filteredData, {
|
||||
columns: this._columns,
|
||||
index: this._index,
|
||||
dtypes: this._dtypes,
|
||||
});
|
||||
}
|
||||
|
||||
where(column: string, operator: '>' | '<' | '>=' | '<=' | '==' | '!=', value: any): DataFrame {
|
||||
return this.filter(row => {
|
||||
const cellValue = row[column];
|
||||
switch (operator) {
|
||||
case '>':
|
||||
return cellValue > value;
|
||||
case '<':
|
||||
return cellValue < value;
|
||||
case '>=':
|
||||
return cellValue >= value;
|
||||
case '<=':
|
||||
return cellValue <= value;
|
||||
case '==':
|
||||
return cellValue === value;
|
||||
case '!=':
|
||||
return cellValue !== value;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Sorting
|
||||
sort(column: string, ascending: boolean = true): DataFrame {
|
||||
const sortedData = [...this.data].sort((a, b) => {
|
||||
const aVal = a[column];
|
||||
const bVal = b[column];
|
||||
|
||||
if (aVal === bVal) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const comparison = aVal > bVal ? 1 : -1;
|
||||
return ascending ? comparison : -comparison;
|
||||
});
|
||||
|
||||
return new DataFrame(sortedData, {
|
||||
columns: this._columns,
|
||||
index: this._index,
|
||||
dtypes: this._dtypes,
|
||||
});
|
||||
}
|
||||
|
||||
// Aggregation
|
||||
groupBy(column: string): GroupByResult {
|
||||
const groups: Record<string, DataFrameRow[]> = {};
|
||||
|
||||
for (const row of this.data) {
|
||||
const key = String(row[column]);
|
||||
if (!groups[key]) {
|
||||
groups[key] = [];
|
||||
}
|
||||
groups[key].push(row);
|
||||
}
|
||||
|
||||
const result: GroupByResult = {};
|
||||
for (const [key, rows] of Object.entries(groups)) {
|
||||
result[key] = new DataFrame(rows, {
|
||||
columns: this._columns,
|
||||
index: this._index,
|
||||
dtypes: this._dtypes,
|
||||
});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
agg(aggregations: Record<string, AggregationFunction>): DataFrameRow {
|
||||
const result: DataFrameRow = {};
|
||||
|
||||
for (const [column, func] of Object.entries(aggregations)) {
|
||||
if (!this._columns.includes(column)) {
|
||||
throw new Error(`Column '${column}' not found`);
|
||||
}
|
||||
|
||||
const values = this.getColumn(column).filter(val => val !== null && val !== undefined);
|
||||
result[column] = func(values);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Statistical methods
|
||||
mean(column: string): number {
|
||||
const values = this.getColumn(column).filter(val => typeof val === 'number');
|
||||
return values.reduce((sum, val) => sum + val, 0) / values.length;
|
||||
}
|
||||
|
||||
sum(column: string): number {
|
||||
const values = this.getColumn(column).filter(val => typeof val === 'number');
|
||||
return values.reduce((sum, val) => sum + val, 0);
|
||||
}
|
||||
|
||||
min(column: string): number {
|
||||
const values = this.getColumn(column).filter(val => typeof val === 'number');
|
||||
return Math.min(...values);
|
||||
}
|
||||
|
||||
max(column: string): number {
|
||||
const values = this.getColumn(column).filter(val => typeof val === 'number');
|
||||
return Math.max(...values);
|
||||
}
|
||||
|
||||
std(column: string): number {
|
||||
const values = this.getColumn(column).filter(val => typeof val === 'number');
|
||||
const mean = values.reduce((sum, val) => sum + val, 0) / values.length;
|
||||
const variance = values.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / values.length;
|
||||
return Math.sqrt(variance);
|
||||
}
|
||||
|
||||
// Time series specific methods
|
||||
resample(timeColumn: string, frequency: string): DataFrame {
|
||||
// Simple resampling implementation
|
||||
// For production, you'd want more sophisticated time-based grouping
|
||||
const sorted = this.sort(timeColumn);
|
||||
|
||||
switch (frequency) {
|
||||
case '1H':
|
||||
return this.resampleByHour(sorted, timeColumn);
|
||||
case '1D':
|
||||
return this.resampleByDay(sorted, timeColumn);
|
||||
default:
|
||||
throw new Error(`Unsupported frequency: ${frequency}`);
|
||||
}
|
||||
}
|
||||
|
||||
private resampleByHour(sorted: DataFrame, timeColumn: string): DataFrame {
|
||||
const groups: Record<string, DataFrameRow[]> = {};
|
||||
|
||||
for (const row of sorted.data) {
|
||||
const date = new Date(row[timeColumn]);
|
||||
const hourKey = `${date.getFullYear()}-${date.getMonth()}-${date.getDate()}-${date.getHours()}`;
|
||||
|
||||
if (!groups[hourKey]) {
|
||||
groups[hourKey] = [];
|
||||
}
|
||||
groups[hourKey].push(row);
|
||||
}
|
||||
|
||||
const aggregatedData: DataFrameRow[] = [];
|
||||
for (const [key, rows] of Object.entries(groups)) {
|
||||
const tempDf = new DataFrame(rows, {
|
||||
columns: this._columns,
|
||||
index: this._index,
|
||||
dtypes: this._dtypes,
|
||||
});
|
||||
|
||||
// Create OHLCV aggregation
|
||||
const aggregated: DataFrameRow = {
|
||||
[timeColumn]: rows[0][timeColumn],
|
||||
open: rows[0].close || rows[0].price,
|
||||
high: tempDf.max('high') || tempDf.max('close') || tempDf.max('price'),
|
||||
low: tempDf.min('low') || tempDf.min('close') || tempDf.min('price'),
|
||||
close: rows[rows.length - 1].close || rows[rows.length - 1].price,
|
||||
volume: tempDf.sum('volume') || 0,
|
||||
};
|
||||
|
||||
aggregatedData.push(aggregated);
|
||||
}
|
||||
|
||||
return new DataFrame(aggregatedData);
|
||||
}
|
||||
|
||||
private resampleByDay(sorted: DataFrame, timeColumn: string): DataFrame {
|
||||
// Similar to resampleByHour but group by day
|
||||
const groups: Record<string, DataFrameRow[]> = {};
|
||||
|
||||
for (const row of sorted.data) {
|
||||
const date = new Date(row[timeColumn]);
|
||||
const dayKey = `${date.getFullYear()}-${date.getMonth()}-${date.getDate()}`;
|
||||
|
||||
if (!groups[dayKey]) {
|
||||
groups[dayKey] = [];
|
||||
}
|
||||
groups[dayKey].push(row);
|
||||
}
|
||||
|
||||
const aggregatedData: DataFrameRow[] = [];
|
||||
for (const [key, rows] of Object.entries(groups)) {
|
||||
const tempDf = new DataFrame(rows, {
|
||||
columns: this._columns,
|
||||
index: this._index,
|
||||
dtypes: this._dtypes,
|
||||
});
|
||||
|
||||
const aggregated: DataFrameRow = {
|
||||
[timeColumn]: rows[0][timeColumn],
|
||||
open: rows[0].close || rows[0].price,
|
||||
high: tempDf.max('high') || tempDf.max('close') || tempDf.max('price'),
|
||||
low: tempDf.min('low') || tempDf.min('close') || tempDf.min('price'),
|
||||
close: rows[rows.length - 1].close || rows[rows.length - 1].price,
|
||||
volume: tempDf.sum('volume') || 0,
|
||||
};
|
||||
|
||||
aggregatedData.push(aggregated);
|
||||
}
|
||||
|
||||
return new DataFrame(aggregatedData);
|
||||
}
|
||||
|
||||
// Utility methods
|
||||
copy(): DataFrame {
|
||||
return new DataFrame(
|
||||
this.data.map(row => ({ ...row })),
|
||||
{
|
||||
columns: this._columns,
|
||||
index: this._index,
|
||||
dtypes: { ...this._dtypes },
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
concat(other: DataFrame): DataFrame {
|
||||
const combinedData = [...this.data, ...other.data];
|
||||
const combinedColumns = Array.from(new Set([...this._columns, ...other._columns]));
|
||||
|
||||
return new DataFrame(combinedData, {
|
||||
columns: combinedColumns,
|
||||
index: this._index,
|
||||
dtypes: { ...this._dtypes, ...other._dtypes },
|
||||
});
|
||||
}
|
||||
|
||||
toArray(): DataFrameRow[] {
|
||||
return this.data.map(row => ({ ...row }));
|
||||
}
|
||||
|
||||
toJSON(): string {
|
||||
return JSON.stringify(this.data);
|
||||
}
|
||||
|
||||
private filterDtypes(
|
||||
columns: string[]
|
||||
): Record<string, 'number' | 'string' | 'boolean' | 'date'> {
|
||||
const filtered: Record<string, 'number' | 'string' | 'boolean' | 'date'> = {};
|
||||
for (const col of columns) {
|
||||
if (this._dtypes[col]) {
|
||||
filtered[col] = this._dtypes[col];
|
||||
}
|
||||
}
|
||||
return filtered;
|
||||
}
|
||||
|
||||
// Display method
|
||||
toString(): string {
|
||||
if (this.empty) {
|
||||
return 'Empty DataFrame';
|
||||
}
|
||||
|
||||
const maxRows = 10;
|
||||
const displayData = this.data.slice(0, maxRows);
|
||||
|
||||
let result = `DataFrame (${this.length} rows x ${this._columns.length} columns)\n`;
|
||||
result += this._columns.join('\t') + '\n';
|
||||
result += '-'.repeat(this._columns.join('\t').length) + '\n';
|
||||
|
||||
for (const row of displayData) {
|
||||
const values = this._columns.map(col => String(row[col] ?? 'null'));
|
||||
result += values.join('\t') + '\n';
|
||||
}
|
||||
|
||||
if (this.length > maxRows) {
|
||||
result += `... (${this.length - maxRows} more rows)\n`;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Factory functions
|
||||
export function createDataFrame(data: DataFrameRow[], options?: DataFrameOptions): DataFrame {
|
||||
return new DataFrame(data, options);
|
||||
}
|
||||
|
||||
export function readCSV(csvData: string, options?: DataFrameOptions): DataFrame {
|
||||
const lines = csvData.trim().split('\n');
|
||||
if (lines.length === 0) {
|
||||
return new DataFrame();
|
||||
}
|
||||
|
||||
const headers = lines[0].split(',').map(h => h.trim());
|
||||
const data: DataFrameRow[] = [];
|
||||
|
||||
for (let i = 1; i < lines.length; i++) {
|
||||
const values = lines[i].split(',').map(v => v.trim());
|
||||
const row: DataFrameRow = {};
|
||||
|
||||
for (let j = 0; j < headers.length; j++) {
|
||||
row[headers[j]] = values[j] || null;
|
||||
}
|
||||
|
||||
data.push(row);
|
||||
}
|
||||
|
||||
return new DataFrame(data, {
|
||||
columns: headers,
|
||||
...options,
|
||||
});
|
||||
}
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
{
|
||||
"extends": "../../tsconfig.lib.json",
|
||||
"references": [
|
||||
{ "path": "../logger" },
|
||||
{ "path": "../utils" }
|
||||
]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue