Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 50 additions & 45 deletions src/methods/dataframe/transform/mutate.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { Series } from '../../../core/dataframe/Series.js';
import { VectorFactory } from '../../../core/storage/VectorFactory.js';

/**
* Creates new columns or modifies existing columns in a DataFrame by applying functions to each row
Expand All @@ -18,35 +17,50 @@ export function mutate(df, columnFunctions, options = {}) {
throw new Error('Column functions must be specified as an object');
}

// Get row count
// Get row count and columns for processing
const rowCount = df.rowCount;
const columns = df.columns;

// Convert DataFrame to array of row objects for processing
const rows = df.toArray();
// Process column functions and create new column arrays
const newColumns = {};

// If inplace=true, modify DataFrame directly
if (inplace) {
// Apply mutation functions to each column
for (const [colName, colFunc] of Object.entries(columnFunctions)) {
if (typeof colFunc !== 'function') {
throw new Error(`Value for column '${colName}' must be a function`);
}
// For each column function
for (const [colName, colFunc] of Object.entries(columnFunctions)) {
if (typeof colFunc !== 'function') {
throw new Error(`Value for column '${colName}' must be a function`);
}

// Create array for new column values
const colValues = new Array(rowCount);

// Create new column by applying function to each row
const values = [];
// Process each row
for (let i = 0; i < rowCount; i++) {
// Build row object for this index
const row = {};
for (const col of columns) {
row[col] = df.col(col).get(i);
}

// Process each row
for (let i = 0; i < rowCount; i++) {
// Apply the transformation function with correct parameters
const result = colFunc(rows[i], i, df);
// Apply the transformation function with correct parameters
let result = colFunc(row, i, df);

// Convert null/undefined to NaN
values.push(result === null || result === undefined ? NaN : result);
// Convert null/undefined to NaN
if (result === null || result === undefined) {
result = NaN;
}

// Create new Series for this column
const vector = VectorFactory.from(values);
const series = new Series(vector, { name: colName });
colValues[i] = result;
}

// Store the column values
newColumns[colName] = colValues;
}

if (inplace) {
// Update existing columns and add new ones
for (const [colName, colValues] of Object.entries(newColumns)) {
// Create a new Series for this column
const series = new Series(colValues, { name: colName });

// Update or add Series to DataFrame
df._columns[colName] = series;
Expand All @@ -62,36 +76,27 @@ export function mutate(df, columnFunctions, options = {}) {
// Return the original DataFrame
return df;
} else {
// Create a new object to store all columns
// Create a new DataFrame with all columns
const newData = {};

// Copy existing columns
for (const col of df.columns) {
newData[col] = df.col(col).toArray();
}

// Apply mutation functions to each column
for (const [colName, colFunc] of Object.entries(columnFunctions)) {
if (typeof colFunc !== 'function') {
throw new Error(`Value for column '${colName}' must be a function`);
// Copy existing columns that aren't being modified
for (const col of columns) {
if (!(col in newColumns)) {
newData[col] = df.col(col).toArray();
} else {
// Use the new values for modified columns
newData[col] = newColumns[col];
}
}

// Create new column
newData[colName] = [];

// Process each row
for (let i = 0; i < rowCount; i++) {
// Apply the transformation function with correct parameters
const result = colFunc(rows[i], i, df);

// Convert null/undefined to NaN
newData[colName].push(
result === null || result === undefined ? NaN : result,
);
// Add completely new columns
for (const colName of Object.keys(newColumns)) {
if (!columns.includes(colName)) {
newData[colName] = newColumns[colName];
}
}

// Create a new DataFrame with updated data
// Create a new DataFrame with the updated data
return new df.constructor(newData);
}
}
Expand Down
24 changes: 15 additions & 9 deletions test/methods/dataframe/transform/mutate.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ describe('DataFrame.mutate', () => {

// Assert
expect(result.columns).toContain('c');
expect(Array.from(result.col('c'))).toEqual([10, 40, 90]);
expect(result.col('c').toArray()).toEqual([10, 40, 90]);
});

test('modifies an existing column with a function', () => {
Expand All @@ -56,7 +56,7 @@ describe('DataFrame.mutate', () => {
const result = df.mutate(columnFunctions);

// Assert
expect(Array.from(result.col('a'))).toEqual([2, 4, 6]);
expect(result.col('a').toArray()).toEqual([2, 4, 6]);
});

test('adds multiple columns with functions', () => {
Expand All @@ -72,8 +72,8 @@ describe('DataFrame.mutate', () => {
// Assert
expect(result.columns).toContain('c');
expect(result.columns).toContain('d');
expect(Array.from(result.col('c'))).toEqual([10, 40, 90]);
expect(Array.from(result.col('d'))).toEqual([11, 22, 33]);
expect(result.col('c').toArray()).toEqual([10, 40, 90]);
expect(result.col('d').toArray()).toEqual([11, 22, 33]);
});

test('throws error if column functions are not provided', () => {
Expand Down Expand Up @@ -101,7 +101,7 @@ describe('DataFrame.mutate', () => {
const result = df.mutate(columnFunctions);

// Assert
expect(Array.from(result.col('index'))).toEqual([0, 1, 2]);
expect(result.col('index').toArray()).toEqual([0, 1, 2]);
});

test('provides DataFrame as third parameter to column functions', () => {
Expand All @@ -114,7 +114,7 @@ describe('DataFrame.mutate', () => {
const result = df.mutate(columnFunctions);

// Assert
expect(Array.from(result.col('colCount'))).toEqual([5, 5, 5]);
expect(result.col('colCount').toArray()).toEqual([5, 5, 5]);
});

test('converts null and undefined to NaN in column functions', () => {
Expand All @@ -129,10 +129,16 @@ describe('DataFrame.mutate', () => {

// Assert
expect(
Array.from(result.col('nullValues')).every((v) => Number.isNaN(v)),
result
.col('nullValues')
.toArray()
.every((v) => Number.isNaN(v)),
).toBe(true);
expect(
Array.from(result.col('undefinedValues')).every((v) => Number.isNaN(v)),
result
.col('undefinedValues')
.toArray()
.every((v) => Number.isNaN(v)),
).toBe(true);
});

Expand All @@ -148,6 +154,6 @@ describe('DataFrame.mutate', () => {
// Assert
expect(result).toBe(df); // Должен вернуть тот же экземпляр DataFrame
expect(df.columns).toContain('c');
expect(Array.from(df.col('c'))).toEqual([10, 40, 90]);
expect(df.col('c').toArray()).toEqual([10, 40, 90]);
});
});
Loading