Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 111 additions & 37 deletions src/methods/dataframe/transform/apply.js
Original file line number Diff line number Diff line change
@@ -1,48 +1,122 @@
import { Series } from '../../../core/dataframe/Series.js';
import { VectorFactory } from '../../../core/storage/VectorFactory.js';

/**
* Apply a function to each column in a DataFrame
*
* @returns {Function} - Function that takes a DataFrame and applies the function to each column
* @param {DataFrame} df - DataFrame to transform
* @param {Function} func - Function to apply to each value
* @param {Object} options - Options for apply
* @param {boolean} [options.inplace=false] - Whether to modify the DataFrame in place
* @param {string|string[]} [options.columns] - Columns to apply the function to (default: all columns)
* @returns {DataFrame} - New DataFrame with transformed values or the original DataFrame if inplace=true
*/
export const apply =
() =>
(df, func, options = {}) => {
const { inplace = false, columns = df.columns } = options;

// Validate columns
for (const col of columns) {
if (!df.columns.includes(col)) {
throw new Error(`Column '${col}' not found`);
}
}
export function apply(df, func, options = {}) {
const { inplace = false, columns = df.columns } = options;

// Create a new object to hold the transformed columns
const result = {};
// Validate function
if (typeof func !== 'function') {
throw new Error('Function to apply must be provided');
}

// Copy columns that are not being transformed
for (const col of df.columns) {
if (!columns.includes(col)) {
result[col] = df.col(col).toArray();
}
}
// Convert columns to array if it's a string
const targetColumns = Array.isArray(columns) ? columns : [columns];

// Apply function to specified columns
for (const col of columns) {
const series = df.col(col);
const values = series.toArray();
result[col] = values.map(func);
}
// Validate columns
for (const col of targetColumns) {
if (!df.columns.includes(col)) {
throw new Error(`Column '${col}' not found`);
}
}

// Apply function to specified columns
if (inplace) {
// Directly modify the DataFrame's internal structure for inplace
for (const col of targetColumns) {
const values = df.col(col).toArray();
const transformedValues = values.map((value, index) => {
const result = func(value, index, col);
// Преобразуем null и undefined в NaN для соответствия тестам
return result === null || result === undefined ? NaN : result;
});

// Create a new Series for this column
const vector = VectorFactory.from(transformedValues);
const series = new Series(vector, { name: col });

// Update the Series in the DataFrame
df._columns[col] = series;
}

return df;
} else {
// Create a new object to hold the transformed columns
const result = {};

// Copy all columns from the original DataFrame
for (const col of df.columns) {
result[col] = df.col(col).toArray();
}

// Apply function to specified columns
for (const col of targetColumns) {
const values = result[col];
result[col] = values.map((value, index) => {
const result = func(value, index, col);
// Преобразуем null и undefined в NaN для соответствия тестам
return result === null || result === undefined ? NaN : result;
});
}

return new df.constructor(result);
}
}

/**
* Apply a function to all columns in a DataFrame
*
* @param {DataFrame} df - DataFrame to transform
* @param {Function} func - Function to apply to each value
* @param {Object} options - Options for applyAll
* @param {boolean} [options.inplace=false] - Whether to modify the DataFrame in place
* @returns {DataFrame} - New DataFrame with transformed values or the original DataFrame if inplace=true
*/
export function applyAll(df, func, options = {}) {
// Simply call apply with all columns
return apply(df, func, { ...options, columns: df.columns });
}

/**
* Register apply methods on DataFrame prototype
* @param {Class} DataFrame - DataFrame class to extend
*/
export function register(DataFrame) {
if (!DataFrame) {
throw new Error('DataFrame instance is required');
}

// Return new DataFrame or modify in place
if (inplace) {
// Replace columns in original DataFrame
for (const col of columns) {
df._columns[col] = result[col];
}
return df;
DataFrame.prototype.apply = function (columns, func, options = {}) {
// If first argument is a function, assume it's for all columns
if (typeof columns === 'function') {
const result = applyAll(this, columns, options);
if (options.inplace) {
return this;
}
return result;
}

// Create a new DataFrame with the transformed columns
return new df.constructor(result);
};
const result = apply(this, func, { ...options, columns });
if (options.inplace) {
return this;
}
return result;
};

export default { apply };
DataFrame.prototype.applyAll = function (func, options = {}) {
const result = applyAll(this, func, options);
if (options.inplace) {
return this;
}
return result;
};
}
99 changes: 70 additions & 29 deletions src/methods/dataframe/transform/assign.js
Original file line number Diff line number Diff line change
@@ -1,53 +1,94 @@
import { Series } from '../../../core/dataframe/Series.js';
import { VectorFactory } from '../../../core/storage/VectorFactory.js';

/**
* Adds or updates columns in a DataFrame.
*
* @param {DataFrame} df - DataFrame instance
* @param {Object} columns - Object with column names as keys and arrays or Series as values
* @returns {DataFrame} - New DataFrame with added/updated columns
* @param {Object} options - Options for assign
* @param {boolean} [options.inplace=false] - Whether to modify the DataFrame in place
* @returns {DataFrame} - New DataFrame with added/updated columns or the original DataFrame if inplace=true
*/
export const assign = (df, columns) => {
// Проверяем, что df существует и является объектом
export function assign(df, columns, options = {}) {
// Validate arguments
if (!df || typeof df !== 'object') {
throw new Error('DataFrame instance is required');
}

// Use the built-in assign method if available
if (df && typeof df.assign === 'function') {
return df.assign(columns);
if (!columns || typeof columns !== 'object' || Array.isArray(columns)) {
throw new Error('Columns must be an object');
}

// Create a copy of the existing columns
const newData = {};
const { inplace = false } = options;

// Copy existing columns
const columnNames = Array.isArray(df.columns) ? df.columns : [];
for (const col of columnNames) {
if (typeof df.col === 'function') {
newData[col] = df.col(col).toArray();
// Process column values to handle Series, arrays, and constants
const processedColumns = {};
for (const [key, value] of Object.entries(columns)) {
// If value is a Series, get its values
if (value && typeof value.toArray === 'function') {
processedColumns[key] = value.toArray();
} else if (Array.isArray(value)) {
processedColumns[key] = value;
} else {
// For constant values, create an array of that value
processedColumns[key] = Array(df.rowCount).fill(value);
}
}

// Add or update columns
for (const [key, value] of Object.entries(columns)) {
// If value is a Series, get its values
const columnData =
value && typeof value.toArray === 'function' ? value.toArray() : value;
if (inplace) {
// For inplace modification, directly modify the DataFrame's internal structure
for (const [key, values] of Object.entries(processedColumns)) {
// Create a vector from the values
const vector = VectorFactory.from(values);

newData[key] = columnData;
}
// Create a new Series for this column
const series = new Series(vector, { name: key });

// Update or add the Series to the DataFrame
df._columns[key] = series;

// If it's a new column, update the _order array
if (!df._order.includes(key)) {
// Since _order is frozen, we need to create a new array and replace it
const newOrder = [...df._order, key];
Object.defineProperty(df, '_order', { value: Object.freeze(newOrder) });
}
}

// Return the original DataFrame instance
return df;
} else {
// Create a new DataFrame with all columns
const newData = {};

// Copy existing columns
for (const col of df.columns) {
if (!(col in processedColumns)) {
newData[col] = df.col(col).toArray();
}
}

// Create new DataFrame with updated columns
return new df.constructor(newData);
};
// Add new/updated columns
Object.assign(newData, processedColumns);

// Create a new DataFrame with the updated data
return new df.constructor(newData);
}
}

/**
* Registers the assign method on DataFrame prototype
* @param {Class} DataFrame - DataFrame class to extend
*/
export const register = (DataFrame) => {
DataFrame.prototype.assign = function(columns) {
return assign(this, columns);
export function register(DataFrame) {
// Store the original reference to the DataFrame instance
DataFrame.prototype.assign = function (columns, options = {}) {
const result = assign(this, columns, options);
if (options.inplace) {
// For inplace modification, return this (the original DataFrame instance)
return this;
}
return result;
};
};

export default { assign, register };
}
Loading
Loading