Skip to content

MongoDB

What is MongoDB?

MongoDB is a document-oriented NoSQL database that stores data in flexible, JSON-like documents (BSON). It's designed for scalability, high availability, and developer productivity.

  • Type: Document Database (NoSQL)
  • Written in: C++
  • License: SSPL (Server Side Public License)
  • Protocol: MongoDB Wire Protocol over TCP
  • Default Port: 27017
  • Query Language: MQL (MongoDB Query Language)
  • Managed Service: MongoDB Atlas

Core Concepts

Document Model

MongoDB Architecture

Document Example:

{
    "_id": ObjectId("507f1f77bcf86cd799439011"),
    "name": "John Doe",
    "email": "[email protected]",
    "address": {
        "street": "123 Main St",
        "city": "New York",
        "zip": "10001"
    },
    "orders": [
        { "product": "iPhone", "price": 999 },
        { "product": "Case", "price": 29 }
    ],
    "tags": ["premium", "verified"],
    "created_at": ISODate("2024-01-15T10:30:00Z")
}

BSON Types

// Common BSON types
{
    "_id": ObjectId("..."),           // 12-byte unique identifier
    "string": "text",                 // UTF-8 string
    "number": 42,                     // 64-bit float (default)
    "int32": NumberInt(42),           // 32-bit integer
    "int64": NumberLong(42),          // 64-bit integer
    "decimal": NumberDecimal("9.99"), // 128-bit decimal
    "boolean": true,                  // Boolean
    "date": ISODate("2024-01-15"),   // UTC datetime
    "timestamp": Timestamp(),         // Internal timestamp
    "null": null,                     // Null value
    "array": [1, 2, 3],              // Array
    "object": { "nested": "doc" },   // Embedded document
    "binary": BinData(0, "..."),     // Binary data
    "regex": /pattern/i,              // Regular expression
    "uuid": UUID("...")               // UUID
}

Architecture

Replica Set

MongoDB Replica Set


Core Features

MongoDB offers: - Flexible schema (schemaless) - Rich query language (MQL) - Secondary indexes (including compound, geospatial, text) - Aggregation framework - Replica sets (high availability) - Horizontal scaling (sharding) - Multi-document ACID transactions (4.0+) - Change streams (real-time data changes) - Full-text search (Atlas Search) - Time-series collections (5.0+) - Queryable encryption (7.0+)


CRUD Operations

Create

// Insert one document
db.users.insertOne({
    name: "John Doe",
    email: "[email protected]",
    age: 30
});

// Insert multiple documents
db.users.insertMany([
    { name: "Jane", email: "[email protected]" },
    { name: "Bob", email: "[email protected]" }
]);

// With write concern
db.users.insertOne(
    { name: "Alice" },
    { writeConcern: { w: "majority", wtimeout: 5000 } }
);

Read

// Find one
db.users.findOne({ email: "[email protected]" });

// Find all matching
db.users.find({ age: { $gte: 18 } });

// With projection (select fields)
db.users.find(
    { status: "active" },
    { name: 1, email: 1, _id: 0 }
);

// Sorting and limiting
db.users.find()
    .sort({ created_at: -1 })
    .skip(20)
    .limit(10);

// Count
db.users.countDocuments({ status: "active" });

Update

// Update one
db.users.updateOne(
    { email: "[email protected]" },
    { $set: { name: "John Smith" } }
);

// Update multiple
db.users.updateMany(
    { status: "pending" },
    { $set: { status: "active" } }
);

// Upsert (insert if not exists)
db.users.updateOne(
    { email: "[email protected]" },
    { $set: { name: "New User" } },
    { upsert: true }
);

// Replace entire document
db.users.replaceOne(
    { _id: ObjectId("...") },
    { name: "Replaced", email: "[email protected]" }
);

// Update operators
db.users.updateOne(
    { _id: ObjectId("...") },
    {
        $set: { name: "Updated" },        // Set field
        $unset: { temp_field: "" },       // Remove field
        $inc: { login_count: 1 },         // Increment
        $push: { tags: "new_tag" },       // Add to array
        $pull: { tags: "old_tag" },       // Remove from array
        $addToSet: { tags: "unique_tag" } // Add if not exists
    }
);

Delete

// Delete one
db.users.deleteOne({ email: "[email protected]" });

// Delete many
db.users.deleteMany({ status: "inactive" });

// Delete all
db.users.deleteMany({});

Query Operators

Comparison

db.products.find({
    price: { $eq: 100 },    // Equal
    price: { $ne: 100 },    // Not equal
    price: { $gt: 100 },    // Greater than
    price: { $gte: 100 },   // Greater than or equal
    price: { $lt: 100 },    // Less than
    price: { $lte: 100 },   // Less than or equal
    price: { $in: [100, 200, 300] },     // In array
    price: { $nin: [100, 200, 300] }     // Not in array
});

Logical

db.products.find({
    $and: [
        { price: { $gte: 100 } },
        { price: { $lte: 500 } }
    ]
});

db.products.find({
    $or: [
        { category: "electronics" },
        { category: "computers" }
    ]
});

db.products.find({
    price: { $not: { $gt: 100 } }
});

db.products.find({
    $nor: [
        { status: "discontinued" },
        { stock: 0 }
    ]
});

Element

db.products.find({
    discount: { $exists: true },    // Field exists
    price: { $type: "number" }      // Field type
});

Array

db.products.find({
    tags: "electronics",                      // Contains element
    tags: { $all: ["electronics", "sale"] },  // Contains all
    tags: { $size: 3 },                       // Array size
    "tags.0": "featured",                     // First element
    tags: { $elemMatch: { $gte: 10, $lt: 20 } } // Element match
});
// Create text index first
db.articles.createIndex({ title: "text", body: "text" });

// Text search
db.articles.find({
    $text: { $search: "mongodb tutorial" }
});

// With score
db.articles.find(
    { $text: { $search: "mongodb" } },
    { score: { $meta: "textScore" } }
).sort({ score: { $meta: "textScore" } });

Aggregation Framework

Pipeline Stages

db.orders.aggregate([
    // $match - Filter documents (like WHERE)
    { $match: { status: "completed" } },

    // $project - Select/transform fields (like SELECT)
    { $project: {
        customer_id: 1,
        total: 1,
        year: { $year: "$order_date" }
    }},

    // $group - Group by field (like GROUP BY)
    { $group: {
        _id: "$customer_id",
        total_spent: { $sum: "$total" },
        order_count: { $sum: 1 },
        avg_order: { $avg: "$total" }
    }},

    // $sort - Sort results (like ORDER BY)
    { $sort: { total_spent: -1 } },

    // $limit - Limit results (like LIMIT)
    { $limit: 10 },

    // $skip - Skip results (like OFFSET)
    { $skip: 0 }
]);

Common Aggregation Patterns

// Lookup (JOIN)
db.orders.aggregate([
    { $lookup: {
        from: "users",
        localField: "user_id",
        foreignField: "_id",
        as: "user"
    }},
    { $unwind: "$user" }  // Flatten array
]);

// Faceted search
db.products.aggregate([
    { $facet: {
        "byCategory": [
            { $group: { _id: "$category", count: { $sum: 1 } } }
        ],
        "byPriceRange": [
            { $bucket: {
                groupBy: "$price",
                boundaries: [0, 100, 500, 1000],
                default: "1000+",
                output: { count: { $sum: 1 } }
            }}
        ],
        "topProducts": [
            { $sort: { sales: -1 } },
            { $limit: 5 }
        ]
    }}
]);

// Window functions (5.0+)
db.sales.aggregate([
    { $setWindowFields: {
        partitionBy: "$region",
        sortBy: { date: 1 },
        output: {
            running_total: {
                $sum: "$amount",
                window: { documents: ["unbounded", "current"] }
            },
            rank: {
                $rank: {}
            }
        }
    }}
]);

// Graph lookup (recursive)
db.employees.aggregate([
    { $match: { name: "CEO" } },
    { $graphLookup: {
        from: "employees",
        startWith: "$_id",
        connectFromField: "_id",
        connectToField: "manager_id",
        as: "reports",
        maxDepth: 3,
        depthField: "level"
    }}
]);

Aggregation Operators

// String
{ $concat: ["$first_name", " ", "$last_name"] }
{ $toUpper: "$name" }
{ $substr: ["$name", 0, 3] }

// Array
{ $size: "$items" }
{ $arrayElemAt: ["$items", 0] }
{ $filter: { input: "$items", cond: { $gte: ["$$this.qty", 10] } } }

// Date
{ $year: "$date" }
{ $month: "$date" }
{ $dayOfMonth: "$date" }
{ $dateToString: { format: "%Y-%m-%d", date: "$date" } }

// Conditional
{ $cond: { if: { $gte: ["$qty", 100] }, then: "bulk", else: "retail" } }
{ $ifNull: ["$description", "No description"] }
{ $switch: {
    branches: [
        { case: { $eq: ["$status", 1] }, then: "pending" },
        { case: { $eq: ["$status", 2] }, then: "complete" }
    ],
    default: "unknown"
}}

Indexing

Index Types

Type Use Case
Single Field Queries on one field
Compound Queries on multiple fields
Multikey Array fields
Text Full-text search
Geospatial (2dsphere) Location queries
Hashed Shard key distribution
TTL Auto-expire documents
Wildcard Dynamic field names

Index Examples

// Single field index
db.users.createIndex({ email: 1 });  // 1 = ascending, -1 = descending

// Unique index
db.users.createIndex({ email: 1 }, { unique: true });

// Compound index
db.orders.createIndex({ user_id: 1, created_at: -1 });

// Partial index (index subset of documents)
db.orders.createIndex(
    { user_id: 1 },
    { partialFilterExpression: { status: "active" } }
);

// Sparse index (skip documents without field)
db.users.createIndex(
    { phone: 1 },
    { sparse: true }
);

// TTL index (auto-delete after time)
db.sessions.createIndex(
    { created_at: 1 },
    { expireAfterSeconds: 3600 }  // 1 hour
);

// Text index
db.articles.createIndex(
    { title: "text", body: "text" },
    { weights: { title: 10, body: 1 } }
);

// Geospatial index
db.locations.createIndex({ coordinates: "2dsphere" });

// Wildcard index (for dynamic fields)
db.products.createIndex({ "attributes.$**": 1 });

// Covered query index
db.users.createIndex({ email: 1, name: 1, status: 1 });
// Query can be answered entirely from index:
db.users.find(
    { email: "[email protected]" },
    { name: 1, status: 1, _id: 0 }
);

Index Analysis

// Explain query
db.users.find({ email: "[email protected]" }).explain("executionStats");

// Key metrics:
// - totalDocsExamined: Documents scanned
// - totalKeysExamined: Index entries scanned
// - executionTimeMillis: Query time
// - stage: COLLSCAN (bad), IXSCAN (good)

// List indexes
db.users.getIndexes();

// Drop index
db.users.dropIndex("email_1");

// Index stats
db.users.aggregate([
    { $indexStats: {} }
]);

Transactions

Multi-Document Transactions (4.0+)

// Start session
const session = db.getMongo().startSession();

try {
    session.startTransaction({
        readConcern: { level: "snapshot" },
        writeConcern: { w: "majority" }
    });

    const orders = session.getDatabase("shop").orders;
    const inventory = session.getDatabase("shop").inventory;

    // Create order
    orders.insertOne({
        user_id: userId,
        items: [{ product_id: productId, qty: 1 }],
        total: 99.99
    }, { session });

    // Decrement inventory
    const result = inventory.updateOne(
        { _id: productId, stock: { $gte: 1 } },
        { $inc: { stock: -1 } },
        { session }
    );

    if (result.modifiedCount === 0) {
        throw new Error("Insufficient stock");
    }

    session.commitTransaction();
} catch (error) {
    session.abortTransaction();
    throw error;
} finally {
    session.endSession();
}

Read/Write Concerns

// Write concerns
{ w: 1 }           // Acknowledged by primary
{ w: "majority" }  // Acknowledged by majority
{ w: 0 }           // Fire and forget (not recommended)
{ w: 3 }           // Acknowledged by 3 nodes
{ j: true }        // Written to journal

// Read concerns
"local"            // Most recent data (default)
"majority"         // Data acknowledged by majority
"linearizable"     // Reflects all successful majority writes
"snapshot"         // For multi-document transactions

Change Streams

// Watch collection changes
const changeStream = db.orders.watch();

changeStream.on("change", (change) => {
    console.log("Change detected:", change);
    // {
    //     operationType: "insert" | "update" | "delete" | "replace",
    //     fullDocument: { ... },
    //     documentKey: { _id: ... },
    //     updateDescription: { updatedFields: {}, removedFields: [] }
    // }
});

// With pipeline filter
const pipeline = [
    { $match: {
        "operationType": { $in: ["insert", "update"] },
        "fullDocument.status": "completed"
    }}
];
const changeStream = db.orders.watch(pipeline);

// Resume from token
const changeStream = db.orders.watch([], {
    resumeAfter: previousResumeToken
});

Sharding

Shard Key Selection

MongoDB Shard Key Selection

Sharding Setup

// Enable sharding on database
sh.enableSharding("mydb");

// Shard collection with hashed key
sh.shardCollection("mydb.users", { user_id: "hashed" });

// Shard collection with range key
sh.shardCollection("mydb.orders", { customer_id: 1, _id: 1 });

// Check sharding status
sh.status();

// View chunk distribution
db.orders.getShardDistribution();

Common Use Cases

1. Content Management

// Flexible article schema
db.articles.insertOne({
    _id: ObjectId(),
    title: "MongoDB Guide",
    slug: "mongodb-guide",
    author: {
        _id: ObjectId("..."),
        name: "John Doe"
    },
    content: "...",
    tags: ["mongodb", "database", "nosql"],
    metadata: {
        views: 1500,
        likes: 42,
        shares: 10
    },
    comments: [
        { user: "Alice", text: "Great article!", date: ISODate() }
    ],
    published_at: ISODate("2024-01-15"),
    status: "published"
});

2. Product Catalog

// Products with varying attributes
db.products.insertMany([
    {
        name: "iPhone 15",
        category: "electronics",
        price: 999,
        attributes: {
            color: "black",
            storage: "256GB",
            screen_size: "6.1"
        }
    },
    {
        name: "Running Shoes",
        category: "footwear",
        price: 120,
        attributes: {
            color: "blue",
            size: [8, 9, 10, 11],
            material: "mesh"
        }
    }
]);

// Wildcard index for attributes
db.products.createIndex({ "attributes.$**": 1 });

3. User Profiles

db.users.insertOne({
    _id: ObjectId(),
    email: "[email protected]",
    profile: {
        name: "John Doe",
        avatar_url: "https://...",
        bio: "Software Engineer"
    },
    preferences: {
        theme: "dark",
        notifications: {
            email: true,
            push: false
        }
    },
    connections: [
        ObjectId("..."),
        ObjectId("...")
    ],
    activity: {
        last_login: ISODate(),
        login_count: 150
    }
});

4. Real-time Analytics

// Pre-aggregated stats
db.page_stats.updateOne(
    {
        page_id: "homepage",
        date: new Date().toISOString().split('T')[0]  // "2024-01-15"
    },
    {
        $inc: {
            views: 1,
            "hourly_views.14": 1  // 2 PM
        }
    },
    { upsert: true }
);

5. Time-Series Data (5.0+)

// Time-series collection
db.createCollection("sensor_data", {
    timeseries: {
        timeField: "timestamp",
        metaField: "sensor_id",
        granularity: "seconds"
    },
    expireAfterSeconds: 86400 * 30  // 30 days TTL
});

// Insert measurements
db.sensor_data.insertMany([
    { sensor_id: "s1", timestamp: ISODate(), temperature: 22.5, humidity: 45 },
    { sensor_id: "s1", timestamp: ISODate(), temperature: 22.7, humidity: 44 }
]);

Trade-offs

Pros Cons
Flexible schema No joins (lookup is slow)
Horizontal scaling Memory-mapped (RAM hungry)
Rich query language SSPL license concerns
Aggregation framework Eventual consistency by default
Built-in replication Transaction overhead
Change streams No referential integrity
Geospatial queries Index size can be large
Developer productivity Data duplication

Performance Characteristics

Metric Typical Value
Read latency 1-10ms
Write latency 1-10ms
Throughput 10,000+ ops/sec/node
Document size 16MB max
Connections Thousands
Index size GB to TB

MongoDB vs Alternatives

Feature MongoDB PostgreSQL DynamoDB Cassandra
Data Model Document Relational Key-value Wide-column
Schema Flexible Fixed Flexible Semi-fixed
Transactions Multi-doc Full ACID Limited Limited
Joins $lookup Native No No
Scaling Sharding Vertical Automatic Linear
Query Rich MQL SQL Limited CQL
Managed Atlas RDS Native Astra

Best Practices

  1. Design for your queries - Embed related data, avoid $lookup
  2. Use appropriate indexes - Analyze with explain()
  3. Limit document size - Keep under 16MB, prefer smaller
  4. Use projection - Only fetch needed fields
  5. Batch operations - Use bulkWrite for multiple ops
  6. Set read/write concerns - Based on consistency needs
  7. Monitor with Atlas - Or use mongostat, mongotop
  8. Use connection pooling - Reuse connections
  9. Plan for sharding early - Choose shard key carefully
  10. Regular backups - mongodump or Atlas backup

Essential Commands

// Database operations
show dbs
use mydb
db.dropDatabase()

// Collection operations
show collections
db.createCollection("users")
db.users.drop()

// Stats
db.stats()
db.users.stats()
db.serverStatus()

// Profiling
db.setProfilingLevel(1, { slowms: 100 })
db.system.profile.find().sort({ ts: -1 }).limit(10)

// Users
db.createUser({
    user: "app",
    pwd: "password",
    roles: [{ role: "readWrite", db: "mydb" }]
})

// Backup/Restore (shell commands)
mongodump --db mydb --out /backup
mongorestore --db mydb /backup/mydb

// Monitoring
mongostat
mongotop

// Replica set status
rs.status()
rs.conf()

Java Driver Example

// Connection
MongoClient client = MongoClients.create("mongodb://localhost:27017");
MongoDatabase database = client.getDatabase("mydb");
MongoCollection<Document> users = database.getCollection("users");

// Insert
Document user = new Document("name", "John")
    .append("email", "[email protected]")
    .append("age", 30);
users.insertOne(user);

// Find
Document found = users.find(eq("email", "[email protected]")).first();

// Update
users.updateOne(
    eq("email", "[email protected]"),
    combine(set("name", "John Doe"), inc("age", 1))
);

// Delete
users.deleteOne(eq("email", "[email protected]"));

// Aggregation
List<Document> results = users.aggregate(Arrays.asList(
    match(eq("status", "active")),
    group("$department", sum("count", 1)),
    sort(descending("count"))
)).into(new ArrayList<>());

// Transactions
try (ClientSession session = client.startSession()) {
    session.startTransaction();
    try {
        orders.insertOne(session, orderDoc);
        inventory.updateOne(session, filter, update);
        session.commitTransaction();
    } catch (Exception e) {
        session.abortTransaction();
        throw e;
    }
}