Mongodb – Skewed Read Load on Mongo Replica Set

load balancingmongodb

I have set up a mongo replica-set with one primary and two secondaries. The problem that I am facing is that the reads from application servers which are connecting with replica-set connection URL are invariably going to only one secondary thereby causing a huge skew in read load between the two secondaries.

Due to this skew, I am constrained for resources on one server while the resources on the other are getting wasted.

Mongo Monitoring Dashboard

rs.status()

{
    "set" : "rs0",
    "date" : ISODate("2020-09-08T19:39:20.394Z"),
    "myState" : 1,
    "term" : NumberLong(16),
    "syncingTo" : "",
    "syncSourceHost" : "",
    "syncSourceId" : -1,
    "heartbeatIntervalMillis" : NumberLong(2000),
    "majorityVoteCount" : 2,
    "writeMajorityCount" : 2,
    "optimes" : {
        "lastCommittedOpTime" : {
            "ts" : Timestamp(1599593958, 2042),
            "t" : NumberLong(16)
        },
        "lastCommittedWallTime" : ISODate("2020-09-08T19:39:18.908Z"),
        "readConcernMajorityOpTime" : {
            "ts" : Timestamp(1599593958, 2042),
            "t" : NumberLong(16)
        },
        "readConcernMajorityWallTime" : ISODate("2020-09-08T19:39:18.908Z"),
        "appliedOpTime" : {
            "ts" : Timestamp(1599593959, 1176),
            "t" : NumberLong(16)
        },
        "durableOpTime" : {
            "ts" : Timestamp(1599593958, 2042),
            "t" : NumberLong(16)
        },
        "lastAppliedWallTime" : ISODate("2020-09-08T19:39:19.138Z"),
        "lastDurableWallTime" : ISODate("2020-09-08T19:39:18.908Z")
    },
    "lastStableRecoveryTimestamp" : Timestamp(1599593936, 300),
    "lastStableCheckpointTimestamp" : Timestamp(1599593936, 300),
    "electionCandidateMetrics" : {
        "lastElectionReason" : "priorityTakeover",
        "lastElectionDate" : ISODate("2020-08-11T17:18:08.040Z"),
        "electionTerm" : NumberLong(16),
        "lastCommittedOpTimeAtElection" : {
            "ts" : Timestamp(1597166288, 246),
            "t" : NumberLong(15)
        },
        "lastSeenOpTimeAtElection" : {
            "ts" : Timestamp(1597166288, 246),
            "t" : NumberLong(15)
        },
        "numVotesNeeded" : 2,
        "priorityAtElection" : 2,
        "electionTimeoutMillis" : NumberLong(10000),
        "priorPrimaryMemberId" : 5,
        "targetCatchupOpTime" : {
            "ts" : Timestamp(1597166288, 394),
            "t" : NumberLong(15)
        },
        "numCatchUpOps" : NumberLong(148),
        "newTermStartDate" : ISODate("2020-08-11T17:18:08.074Z"),
        "wMajorityWriteAvailabilityDate" : ISODate("2020-08-11T17:18:10.782Z")
    },
    "members" : [
        {
            "_id" : 3,
            "name" : "1.1.1.1:27017",
            "health" : 1,
            "state" : 1,
            "stateStr" : "PRIMARY",
            "uptime" : 2427845,
            "optime" : {
                "ts" : Timestamp(1599593959, 1176),
                "t" : NumberLong(16)
            },
            "optimeDate" : ISODate("2020-09-08T19:39:19Z"),
            "syncingTo" : "",
            "syncSourceHost" : "",
            "syncSourceId" : -1,
            "infoMessage" : "",
            "electionTime" : Timestamp(1597166288, 383),
            "electionDate" : ISODate("2020-08-11T17:18:08Z"),
            "configVersion" : 32,
            "self" : true,
            "lastHeartbeatMessage" : ""
        },
        {
            "_id" : 5,
            "name" : "3.3.3.3:27017",
            "health" : 1,
            "state" : 2,
            "stateStr" : "SECONDARY",
            "uptime" : 3672,
            "optime" : {
                "ts" : Timestamp(1599593954, 3378),
                "t" : NumberLong(16)
            },
            "optimeDurable" : {
                "ts" : Timestamp(1599593954, 3378),
                "t" : NumberLong(16)
            },
            "optimeDate" : ISODate("2020-09-08T19:39:14Z"),
            "optimeDurableDate" : ISODate("2020-09-08T19:39:14Z"),
            "lastHeartbeat" : ISODate("2020-09-08T19:39:19.238Z"),
            "lastHeartbeatRecv" : ISODate("2020-09-08T19:39:20.261Z"),
            "pingMs" : NumberLong(0),
            "lastHeartbeatMessage" : "",
            "syncingTo" : "1.1.1.1:27017",
            "syncSourceHost" : "1.1.1.1:27017",
            "syncSourceId" : 3,
            "infoMessage" : "",
            "configVersion" : 32
        },
        {
            "_id" : 6,
            "name" : "2.2.2.2:27017",
            "health" : 1,
            "state" : 2,
            "stateStr" : "SECONDARY",
            "uptime" : 3341,
            "optime" : {
                "ts" : Timestamp(1599593957, 2190),
                "t" : NumberLong(16)
            },
            "optimeDurable" : {
                "ts" : Timestamp(1599593957, 2190),
                "t" : NumberLong(16)
            },
            "optimeDate" : ISODate("2020-09-08T19:39:17Z"),
            "optimeDurableDate" : ISODate("2020-09-08T19:39:17Z"),
            "lastHeartbeat" : ISODate("2020-09-08T19:39:18.751Z"),
            "lastHeartbeatRecv" : ISODate("2020-09-08T19:39:20.078Z"),
            "pingMs" : NumberLong(0),
            "lastHeartbeatMessage" : "",
            "syncingTo" : "1.1.1.1:27017",
            "syncSourceHost" : "1.1.1.1:27017",
            "syncSourceId" : 3,
            "infoMessage" : "",
            "configVersion" : 32
        }
    ],
    "ok" : 1,
    "$clusterTime" : {
        "clusterTime" : Timestamp(1599593959, 1329),
        "signature" : {
            "hash" : BinData(0,"dfdfdggjhkljoj+mvY8="),
            "keyId" : NumberLong("897987897897987")
        }
    },
    "operationTime" : Timestamp(1599593959, 1176)
}

Please help me here. Is this something which is normally expected from a mongo replica-set cluster?

Best Answer

For a secondary read preference, the server selection algorithm determines which node has the lowest round trip time, and randomly selects one of the nodes that is within 15 milliseconds of that latency.

If the RNG is good, and the sample size large enough, that will usually balance, but not always.

If you need to ensure strict balancing between the secondaries, you will probably have to write your own server selection code and connect directly to that specific node instead of using the replica set connection string for secondary reads.