From 74765b5ba25eef3bbad2d039bd0190e562f5a856 Mon Sep 17 00:00:00 2001 From: Alex K Date: Thu, 5 Jun 2025 13:36:41 +0200 Subject: [PATCH] fix: mutate method for DataFrame --- src/methods/dataframe/transform/mutate.js | 95 ++++++++++--------- .../dataframe/transform/mutate.test.js | 24 +++-- 2 files changed, 65 insertions(+), 54 deletions(-) diff --git a/src/methods/dataframe/transform/mutate.js b/src/methods/dataframe/transform/mutate.js index 447eac3..2dbce9f 100644 --- a/src/methods/dataframe/transform/mutate.js +++ b/src/methods/dataframe/transform/mutate.js @@ -1,5 +1,4 @@ import { Series } from '../../../core/dataframe/Series.js'; -import { VectorFactory } from '../../../core/storage/VectorFactory.js'; /** * Creates new columns or modifies existing columns in a DataFrame by applying functions to each row @@ -18,35 +17,50 @@ export function mutate(df, columnFunctions, options = {}) { throw new Error('Column functions must be specified as an object'); } - // Get row count + // Get row count and columns for processing const rowCount = df.rowCount; + const columns = df.columns; - // Convert DataFrame to array of row objects for processing - const rows = df.toArray(); + // Process column functions and create new column arrays + const newColumns = {}; - // If inplace=true, modify DataFrame directly - if (inplace) { - // Apply mutation functions to each column - for (const [colName, colFunc] of Object.entries(columnFunctions)) { - if (typeof colFunc !== 'function') { - throw new Error(`Value for column '${colName}' must be a function`); - } + // For each column function + for (const [colName, colFunc] of Object.entries(columnFunctions)) { + if (typeof colFunc !== 'function') { + throw new Error(`Value for column '${colName}' must be a function`); + } + + // Create array for new column values + const colValues = new Array(rowCount); - // Create new column by applying function to each row - const values = []; + // Process each row + for (let i = 0; i < rowCount; i++) { + // Build row object for this index + const row = {}; + for (const col of columns) { + row[col] = df.col(col).get(i); + } - // Process each row - for (let i = 0; i < rowCount; i++) { - // Apply the transformation function with correct parameters - const result = colFunc(rows[i], i, df); + // Apply the transformation function with correct parameters + let result = colFunc(row, i, df); - // Convert null/undefined to NaN - values.push(result === null || result === undefined ? NaN : result); + // Convert null/undefined to NaN + if (result === null || result === undefined) { + result = NaN; } - // Create new Series for this column - const vector = VectorFactory.from(values); - const series = new Series(vector, { name: colName }); + colValues[i] = result; + } + + // Store the column values + newColumns[colName] = colValues; + } + + if (inplace) { + // Update existing columns and add new ones + for (const [colName, colValues] of Object.entries(newColumns)) { + // Create a new Series for this column + const series = new Series(colValues, { name: colName }); // Update or add Series to DataFrame df._columns[colName] = series; @@ -62,36 +76,27 @@ export function mutate(df, columnFunctions, options = {}) { // Return the original DataFrame return df; } else { - // Create a new object to store all columns + // Create a new DataFrame with all columns const newData = {}; - // Copy existing columns - for (const col of df.columns) { - newData[col] = df.col(col).toArray(); - } - - // Apply mutation functions to each column - for (const [colName, colFunc] of Object.entries(columnFunctions)) { - if (typeof colFunc !== 'function') { - throw new Error(`Value for column '${colName}' must be a function`); + // Copy existing columns that aren't being modified + for (const col of columns) { + if (!(col in newColumns)) { + newData[col] = df.col(col).toArray(); + } else { + // Use the new values for modified columns + newData[col] = newColumns[col]; } + } - // Create new column - newData[colName] = []; - - // Process each row - for (let i = 0; i < rowCount; i++) { - // Apply the transformation function with correct parameters - const result = colFunc(rows[i], i, df); - - // Convert null/undefined to NaN - newData[colName].push( - result === null || result === undefined ? NaN : result, - ); + // Add completely new columns + for (const colName of Object.keys(newColumns)) { + if (!columns.includes(colName)) { + newData[colName] = newColumns[colName]; } } - // Create a new DataFrame with updated data + // Create a new DataFrame with the updated data return new df.constructor(newData); } } diff --git a/test/methods/dataframe/transform/mutate.test.js b/test/methods/dataframe/transform/mutate.test.js index 6154925..f63a4b4 100644 --- a/test/methods/dataframe/transform/mutate.test.js +++ b/test/methods/dataframe/transform/mutate.test.js @@ -43,7 +43,7 @@ describe('DataFrame.mutate', () => { // Assert expect(result.columns).toContain('c'); - expect(Array.from(result.col('c'))).toEqual([10, 40, 90]); + expect(result.col('c').toArray()).toEqual([10, 40, 90]); }); test('modifies an existing column with a function', () => { @@ -56,7 +56,7 @@ describe('DataFrame.mutate', () => { const result = df.mutate(columnFunctions); // Assert - expect(Array.from(result.col('a'))).toEqual([2, 4, 6]); + expect(result.col('a').toArray()).toEqual([2, 4, 6]); }); test('adds multiple columns with functions', () => { @@ -72,8 +72,8 @@ describe('DataFrame.mutate', () => { // Assert expect(result.columns).toContain('c'); expect(result.columns).toContain('d'); - expect(Array.from(result.col('c'))).toEqual([10, 40, 90]); - expect(Array.from(result.col('d'))).toEqual([11, 22, 33]); + expect(result.col('c').toArray()).toEqual([10, 40, 90]); + expect(result.col('d').toArray()).toEqual([11, 22, 33]); }); test('throws error if column functions are not provided', () => { @@ -101,7 +101,7 @@ describe('DataFrame.mutate', () => { const result = df.mutate(columnFunctions); // Assert - expect(Array.from(result.col('index'))).toEqual([0, 1, 2]); + expect(result.col('index').toArray()).toEqual([0, 1, 2]); }); test('provides DataFrame as third parameter to column functions', () => { @@ -114,7 +114,7 @@ describe('DataFrame.mutate', () => { const result = df.mutate(columnFunctions); // Assert - expect(Array.from(result.col('colCount'))).toEqual([5, 5, 5]); + expect(result.col('colCount').toArray()).toEqual([5, 5, 5]); }); test('converts null and undefined to NaN in column functions', () => { @@ -129,10 +129,16 @@ describe('DataFrame.mutate', () => { // Assert expect( - Array.from(result.col('nullValues')).every((v) => Number.isNaN(v)), + result + .col('nullValues') + .toArray() + .every((v) => Number.isNaN(v)), ).toBe(true); expect( - Array.from(result.col('undefinedValues')).every((v) => Number.isNaN(v)), + result + .col('undefinedValues') + .toArray() + .every((v) => Number.isNaN(v)), ).toBe(true); }); @@ -148,6 +154,6 @@ describe('DataFrame.mutate', () => { // Assert expect(result).toBe(df); // Должен вернуть тот же экземпляр DataFrame expect(df.columns).toContain('c'); - expect(Array.from(df.col('c'))).toEqual([10, 40, 90]); + expect(df.col('c').toArray()).toEqual([10, 40, 90]); }); });