i have collection filled documents this:
{ data: 11, version: "0.0.32" }
and have test
suffix version
:
{ data: 55, version: "0.0.42-test" }
the version
field has different values conforms pattern: 0.0.xxx
. update documents this:
{ data: 11, version: 32 }
and suffixed version (for test documents - version
should negative):
{ data: 55, version: -42 }
the collection these documents used our critical system, needs turned off while updating data - want update/change fast possible. there 66_000_000
documents in collection, , it's 100gb in size.
which type of mongodb operation efficient one?
the efficient way in upcoming release of mongodb of writing using $split
operator split our string shown here assign last element in array variable using $let
variable operator , $arrayelemat
operators.
next, use $switch
operator perform logical condition processing or case statement against variable.
the condition here $gt
returns true if value contains "test"
, , in case in in expression split string , return $concat
enated value of first element in newly computed array , -
. if condition evaluates false, return variable.
of course in our case statement, use $indexofcp
returns -1
if there no occurrences of "test"
.
let cursor = db.collection.aggregate( [ { "$project": { "data": 1, "version": { "$let": { "vars": { "v": { "$arrayelemat": [ { "$split": [ "$version", "." ] }, -1 ] } }, "in": { "$switch": { "branches": [ { "case": { "$gt": [ { "$indexofcp": [ "$$v", "test" ] }, -1 ] }, "then": { "$concat": [ "-", "", { "$arrayelemat": [ { "$split": [ "$$v", "-" ] }, 0 ]} ] } } ], "default": "$$v" } } } } }} ] )
the aggregation query produces this:
{ "_id" : objectid("57a98773cbbd42a2156260d8"), "data" : 11, "version" : "32" } { "_id" : objectid("57a98773cbbd42a2156260d9"), "data" : 55, "version" : "-42" }
as can see, "version" field data string. if data type field not matter then, can use $out
aggregation pipeline stage operator write result new collection or replace collection.
{ "out": "collection" }
if need convert data floating point number then, way this, because mongodb doesn't not provides way type conversion out of box except integer string, iterate aggregation cursor object , convert value using parsefloat
or number
update documents using $set
operator , bulkwrite()
method maximum efficiency.
let requests = []; cursor.foreach(doc => { requests.push({ "updateone": { "filter": { "_id": doc._id }, "update": { "$set": { "data": doc.data, "version": parsefloat(doc.version) }, "$unset": { "person": " " } } } }); if ( requests.length === 1000 ) { // execute per 1000 ops , re-init db.collection.bulkwrite(requests); requests = []; }} ); // clean queues if(requests.length > 0) { db.coll.bulkwrite(requests); }
while aggregation query work in mongodb 3.4 or newer our best bet mongodb 3.2 backwards mapreduce
bulkwrite()
method.
var results = db.collection.mapreduce( function() { var v = this.version.split(".")[2]; emit(this._id, v.indexof("-") > -1 ? "-"+v.replace(/\d+/g, '') : v) }, function(key, value) {}, { "out": { "inline": 1 } } )["results"];
results
looks this:
[ { "_id" : objectid("57a98773cbbd42a2156260d8"), "value" : "32" }, { "_id" : objectid("57a98773cbbd42a2156260d9"), "value" : "-42" } ]
from here use previous .foreach
loop update documents.
from mongodb 2.6 3.0 need use deprecated bulk()
api , associated method show in answer here.
Comments
Post a Comment