From 0e3cce13dd2ccd29a7ed857c4cb64cc48e5232ed Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Sat, 28 Dec 2024 14:05:36 +0100 Subject: [PATCH 1/2] [MINOR] Increase Memory Estimate for Frames This commit increases the default estimate of frame size. Previously, frames were estimated similarly to Matrices. The wrong estimate leads to problems on frames of > Integer rows. To improve it, this commit defaults to 8 character strings on all cells. In an unread matrix. Since there is no way of knowing if the input Frame contains longer strings, it is still a subpar estimate. However, it is an improvement overestimating everything as a dense double Matrix. (The change happened because I encountered very incorrect estimates in BEWARE) --- src/main/java/org/apache/sysds/hops/DataOp.java | 9 +++++++-- .../java/org/apache/sysds/hops/OptimizerUtils.java | 10 ++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/sysds/hops/DataOp.java b/src/main/java/org/apache/sysds/hops/DataOp.java index 82e5ecbbad5..7be61f4129b 100644 --- a/src/main/java/org/apache/sysds/hops/DataOp.java +++ b/src/main/java/org/apache/sysds/hops/DataOp.java @@ -359,8 +359,8 @@ public boolean allowsAllExecTypes() protected double computeOutputMemEstimate( long dim1, long dim2, long nnz ) { double ret = 0; - - if ( getDataType() == DataType.SCALAR ) + final DataType dt = getDataType(); + if ( dt == DataType.SCALAR ) { switch( getValueType() ) { @@ -379,6 +379,11 @@ protected double computeOutputMemEstimate( long dim1, long dim2, long nnz ) ret = 0; } } + else if(dt == DataType.FRAME) { + if(_op == OpOpData.PERSISTENTREAD || _op == OpOpData.TRANSIENTREAD) { + ret = OptimizerUtils.estimateSizeExactFrame(dim1, dim2); + } + } else //MATRIX / FRAME { if( _op == OpOpData.PERSISTENTREAD diff --git a/src/main/java/org/apache/sysds/hops/OptimizerUtils.java b/src/main/java/org/apache/sysds/hops/OptimizerUtils.java index 6338ff7a709..3144b178dd5 100644 --- a/src/main/java/org/apache/sysds/hops/OptimizerUtils.java +++ b/src/main/java/org/apache/sysds/hops/OptimizerUtils.java @@ -64,6 +64,7 @@ import org.apache.sysds.runtime.util.IndexRange; import org.apache.sysds.runtime.util.UtilFunctions; import org.apache.sysds.utils.stats.InfrastructureAnalyzer; +import org.apache.sysds.utils.MemoryEstimates; public class OptimizerUtils { @@ -788,6 +789,15 @@ public static long estimateSizeExactSparsity(long nrows, long ncols, long nnz) double sp = getSparsity(nrows, ncols, nnz); return estimateSizeExactSparsity(nrows, ncols, sp); } + + + public static long estimateSizeExactFrame(long nRows, long nCols){ + if(nRows > Integer.MAX_VALUE) + return Long.MAX_VALUE; + + // assuming String arrays and on average 8 characters per value. + return (long)MemoryEstimates.stringArrayCost((int)nRows, 8) * nCols; + } /** * Estimates the footprint (in bytes) for an in-memory representation of a From e095cf171dab9ee82b68740d40e12a5f1289cdbf Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Sat, 28 Dec 2024 15:06:38 +0100 Subject: [PATCH 2/2] tests --- .../org/apache/sysds/hops/OptimizerUtils.java | 3 ++ .../component/misc/MemoryEstimateTest.java | 1 - .../component/misc/OptimizerUtilsTest.java | 43 +++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 src/test/java/org/apache/sysds/test/component/misc/OptimizerUtilsTest.java diff --git a/src/main/java/org/apache/sysds/hops/OptimizerUtils.java b/src/main/java/org/apache/sysds/hops/OptimizerUtils.java index 3144b178dd5..a3161c57230 100644 --- a/src/main/java/org/apache/sysds/hops/OptimizerUtils.java +++ b/src/main/java/org/apache/sysds/hops/OptimizerUtils.java @@ -792,6 +792,9 @@ public static long estimateSizeExactSparsity(long nrows, long ncols, long nnz) public static long estimateSizeExactFrame(long nRows, long nCols){ + // Currently we do not support frames larger than INT. + // Therefore, we estimate their size to be extremely large. + // The large size force spark operations. if(nRows > Integer.MAX_VALUE) return Long.MAX_VALUE; diff --git a/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java b/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java index 8c8e31535bf..d68c30f8367 100644 --- a/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java +++ b/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java @@ -87,7 +87,6 @@ public void test() { assertEquals(MemoryEstimates.doubleArrayCost(length), measure(arrayDouble), 0.2); break; default: - System.out.println(arrayToMeasure.getClass().getSimpleName()); throw new NotImplementedException(arrayToMeasure + " not implemented"); } } diff --git a/src/test/java/org/apache/sysds/test/component/misc/OptimizerUtilsTest.java b/src/test/java/org/apache/sysds/test/component/misc/OptimizerUtilsTest.java new file mode 100644 index 00000000000..16e9b2c27b1 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/misc/OptimizerUtilsTest.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.misc; + +import static org.junit.Assert.assertTrue; + +import org.apache.sysds.hops.OptimizerUtils; +import org.junit.Test; + +public class OptimizerUtilsTest { + + @Test + public void estimateFrameSize() { + Long size = OptimizerUtils.estimateSizeExactFrame(10, 10); + assertTrue(size > 10 * 10); + } + + @Test + public void estimateFrameSizeMoreRowsThanInt() { + // Currently we do not support frames larger than INT. Therefore we estimate their size to be extremely large. + // The large size force spark operations + Long size = OptimizerUtils.estimateSizeExactFrame(Integer.MAX_VALUE + 1L, 10); + + assertTrue(size == Long.MAX_VALUE); + } +}