-
Notifications
You must be signed in to change notification settings - Fork 3
optimize count items on get buckets query #369
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,6 @@ | ||
| { | ||
| "name": "s3utils", | ||
| "version": "1.17.2", | ||
| "version": "1.17.3", | ||
| "engines": { | ||
| "node": ">= 22" | ||
| }, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ const { MongoClientInterface } = require('arsenal').storage.metadata.mongoclient | |
| const { Long } = require('mongodb'); | ||
| const { errors, constants } = require('arsenal'); | ||
| const async = require('async'); | ||
| const { promisify } = require('util'); | ||
| const { validStorageMetricLevels } = require('../CountItems/utils/constants'); | ||
| const getLocationConfig = require('./locationConfig'); | ||
| const monitoring = require('./monitoring'); | ||
|
|
@@ -52,35 +53,6 @@ const baseMetricsObject = { | |
| }; | ||
|
|
||
| class S3UtilsMongoClient extends MongoClientInterface { | ||
| /** | ||
| * Get the list of buckets and their location dates | ||
| * @param {object} log - Werelogs logger | ||
| * @returns {object} - Object with bucket names as keys | ||
| * and their creation dates as values | ||
| */ | ||
| async _getUsersBucketCreationDates(log) { | ||
| let cursorUsersBucketCreationDates; | ||
| try { | ||
| cursorUsersBucketCreationDates = await this.getCollection(USERSBUCKET).find({}, { | ||
| projection: { | ||
| 'value.creationDate': 1, | ||
| }, | ||
| }); | ||
| const usersBucketCreationDatesArray = await cursorUsersBucketCreationDates.toArray(); | ||
| return usersBucketCreationDatesArray | ||
| .reduce((map, obj) => ({ ...map, [obj._id]: obj.value.creationDate }), {}); | ||
| } catch (err) { | ||
| log.error('Failed to read __usersbucket collection', { | ||
| method: 'getUsersBucketCreationDates', | ||
| errDetails: { ...err }, | ||
| errorString: err.toString(), | ||
| }); | ||
| return null; | ||
| } finally { | ||
| await cursorUsersBucketCreationDates.close(); | ||
| } | ||
| } | ||
|
|
||
| async updateInflightDeltas(allMetrics, log) { | ||
| let cursor; | ||
| try { | ||
|
|
@@ -100,13 +72,11 @@ class S3UtilsMongoClient extends MongoClientInterface { | |
|
|
||
| const inflights = await cursor.toArray(); | ||
| // convert inflights to a map with _id: usedCapacity._inflight | ||
| const inflightsMap = inflights.reduce((map, obj) => { | ||
| const inflightLong = obj.usedCapacity?._inflight || 0n; | ||
| return { | ||
| ...map, | ||
| [obj._id]: inflightLong, | ||
| }; | ||
| }, {}); | ||
| const inflightsMap = {}; | ||
francoisferrand marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| for (const inflight of inflights) { | ||
| const inflightValue = inflight.usedCapacity?._inflight || 0n; | ||
| inflightsMap[inflight._id] = inflightValue; | ||
| } | ||
|
|
||
| const accountInflights = {}; | ||
| allMetrics.forEach(entry => { | ||
|
|
@@ -201,22 +171,21 @@ class S3UtilsMongoClient extends MongoClientInterface { | |
|
|
||
| const locationConfig = getLocationConfig(log); | ||
|
|
||
| const usersBucketCreationDatesMap = await this._getUsersBucketCreationDates(log); | ||
| let bucketCreationDate; | ||
francoisferrand marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| const getUsersBucketCreationDateAsync = promisify(this.getUsersBucketCreationDate).bind(this); | ||
| try { | ||
| bucketCreationDate = await getUsersBucketCreationDateAsync(bucketInfo.getOwner(), bucketName, log); | ||
| } catch (err) { | ||
| return callback(errors.InternalError); | ||
| } | ||
|
|
||
| const bucketStatus = bucketInfo.getVersioningConfiguration(); | ||
| const isVer = (bucketStatus && (bucketStatus.Status === 'Enabled' | ||
| || bucketStatus.Status === 'Suspended')); | ||
|
|
||
| if (!usersBucketCreationDatesMap) { | ||
| return callback(errors.InternalError); | ||
| } | ||
|
|
||
| const bucketDate = usersBucketCreationDatesMap[`${bucketInfo.getOwner()}${constants.splitter}${bucketName}`]; | ||
| if (bucketDate) { | ||
| bucketKey = `bucket_${bucketName}_${new Date(bucketDate).getTime()}`; | ||
| if (bucketKey) { | ||
| inflightsPreScan = await this.readStorageConsumptionInflights(bucketKey, log); | ||
| } | ||
| if (bucketCreationDate) { | ||
| bucketKey = `bucket_${bucketName}_${new Date(bucketCreationDate).getTime()}`; | ||
| inflightsPreScan = await this.readStorageConsumptionInflights(bucketKey, log); | ||
| } | ||
|
|
||
| let startCursorDate = new Date(); | ||
|
|
@@ -243,7 +212,7 @@ class S3UtilsMongoClient extends MongoClientInterface { | |
| bucketName, | ||
| bucketInfo, | ||
| entry, | ||
| usersBucketCreationDatesMap[`${entry.value['owner-id']}${constants.splitter}${bucketName}`], | ||
| bucketCreationDate, | ||
| isTransient, | ||
| locationConfig, | ||
| { | ||
|
|
@@ -1022,6 +991,19 @@ class S3UtilsMongoClient extends MongoClientInterface { | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Read the bucket creation date from the `__usersbucket` collection. | ||
| * | ||
| * If the bucket entry is missing or does not contain a creation date, | ||
| * the function doesn't throw an error, and instead | ||
| * invoke the callback with no data `(null, undefined)`. | ||
| * | ||
| * @param {string} ownerId - Bucket owner canonical ID | ||
| * @param {string} bucketName - Bucket name | ||
| * @param {Object} log - Logger | ||
| * @param {Function} cb - Node-style callback: `(err, creationDate)` | ||
| * @returns {void} | ||
| */ | ||
| async getUsersBucketCreationDate(ownerId, bucketName, log, cb) { | ||
| try { | ||
| const usersBucketCol = this.getCollection(USERSBUCKET); | ||
|
|
@@ -1033,15 +1015,17 @@ class S3UtilsMongoClient extends MongoClientInterface { | |
| }, | ||
| }); | ||
| if (!res || !res.value || !res.value.creationDate) { | ||
| log.error('bucket entry not found in __usersbucket', { | ||
| log.warn('bucket entry not found in __usersbucket', { | ||
| method: 'getUsersBucketCreationDate', | ||
| bucketName, | ||
| ownerId, | ||
| }); | ||
| return cb(new Error('Bucket entry not found')); | ||
| return cb(null, undefined); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are we ignoring error now?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe need a comment explaining the behavior of this function on "not found" (in function's doc header?)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah I changed it following your recommandations, as it is easier to handle the "no data case" like this |
||
| } | ||
| return cb(null, res.value.creationDate); | ||
| } catch (err) { | ||
| log.error('failed to read bucket entry from __usersbucket', { | ||
| method: 'getUsersBucketCreationDate', | ||
| bucketName, | ||
| ownerId, | ||
| errDetails: { ...err }, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The real problem, found by Maël : It's not even that we are querying all the buckets when we only need one (the perf in our test are still ok when doing this), but it's the reduce with spread operator : making a copy of the map on each iterations. Changing this from reduce to a simple loop reduce script run time from a few minutes to 2 seconds -_-
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
btw, this function isn't used anymore, because as it turns out later, we don't need to query all buckets info, but only the info of a specific bucket and we have its name, so we can use another function for that.
I still kept this function, it's associated with tests, don't wanna touch too many things, anyways we are supposed to try to replace this script with SUR