Skip to content

Union-Find (Disjoint Set Union) Pattern

Overview

Data structure to track elements partitioned into disjoint sets. Supports near-constant time union and find operations with path compression and union by rank.

When to Use

  • Finding connected components
  • Detecting cycles in undirected graph
  • Kruskal's minimum spanning tree
  • Grouping/clustering problems
  • Dynamic connectivity
  • Checking if nodes are in same group

Union-Find Operations

Template Code

Basic Union-Find with Optimizations

class UnionFind {
    private int[] parent;
    private int[] rank;
    private int count;  // Number of components

    public UnionFind(int n) {
        parent = new int[n];
        rank = new int[n];
        count = n;

        for (int i = 0; i < n; i++) {
            parent[i] = i;
            rank[i] = 0;
        }
    }

    // Find with path compression
    public int find(int x) {
        if (parent[x] != x) {
            parent[x] = find(parent[x]);  // Path compression
        }
        return parent[x];
    }

    // Union by rank
    public boolean union(int x, int y) {
        int rootX = find(x);
        int rootY = find(y);

        if (rootX == rootY) {
            return false;  // Already connected
        }

        // Attach smaller tree under larger tree
        if (rank[rootX] < rank[rootY]) {
            parent[rootX] = rootY;
        } else if (rank[rootX] > rank[rootY]) {
            parent[rootY] = rootX;
        } else {
            parent[rootY] = rootX;
            rank[rootX]++;
        }

        count--;
        return true;
    }

    public boolean connected(int x, int y) {
        return find(x) == find(y);
    }

    public int getCount() {
        return count;
    }
}

Number of Connected Components

public int countComponents(int n, int[][] edges) {
    UnionFind uf = new UnionFind(n);

    for (int[] edge : edges) {
        uf.union(edge[0], edge[1]);
    }

    return uf.getCount();
}

Number of Islands (Union-Find Approach)

public int numIslands(char[][] grid) {
    if (grid == null || grid.length == 0) return 0;

    int m = grid.length, n = grid[0].length;
    UnionFind uf = new UnionFind(m * n);
    int water = 0;

    int[][] directions = {{0,1}, {1,0}};

    for (int i = 0; i < m; i++) {
        for (int j = 0; j < n; j++) {
            if (grid[i][j] == '0') {
                water++;
            } else {
                for (int[] dir : directions) {
                    int ni = i + dir[0], nj = j + dir[1];
                    if (ni < m && nj < n && grid[ni][nj] == '1') {
                        uf.union(i * n + j, ni * n + nj);
                    }
                }
            }
        }
    }

    return uf.getCount() - water;
}

Graph Valid Tree

public boolean validTree(int n, int[][] edges) {
    // Tree: n nodes, n-1 edges, connected, no cycles
    if (edges.length != n - 1) return false;

    UnionFind uf = new UnionFind(n);

    for (int[] edge : edges) {
        // If already connected, adding edge creates cycle
        if (!uf.union(edge[0], edge[1])) {
            return false;
        }
    }

    return uf.getCount() == 1;  // Must be connected
}

Redundant Connection (Find Cycle Edge)

public int[] findRedundantConnection(int[][] edges) {
    int n = edges.length;
    UnionFind uf = new UnionFind(n + 1);  // 1-indexed

    for (int[] edge : edges) {
        if (!uf.union(edge[0], edge[1])) {
            return edge;  // This edge creates cycle
        }
    }

    return new int[0];
}

Accounts Merge

public List<List<String>> accountsMerge(List<List<String>> accounts) {
    UnionFind uf = new UnionFind(accounts.size());
    Map<String, Integer> emailToIndex = new HashMap<>();

    // Union accounts with same email
    for (int i = 0; i < accounts.size(); i++) {
        for (int j = 1; j < accounts.get(i).size(); j++) {
            String email = accounts.get(i).get(j);

            if (emailToIndex.containsKey(email)) {
                uf.union(i, emailToIndex.get(email));
            } else {
                emailToIndex.put(email, i);
            }
        }
    }

    // Group emails by root account
    Map<Integer, TreeSet<String>> rootToEmails = new HashMap<>();
    for (int i = 0; i < accounts.size(); i++) {
        int root = uf.find(i);
        rootToEmails.putIfAbsent(root, new TreeSet<>());

        for (int j = 1; j < accounts.get(i).size(); j++) {
            rootToEmails.get(root).add(accounts.get(i).get(j));
        }
    }

    // Build result
    List<List<String>> result = new ArrayList<>();
    for (int root : rootToEmails.keySet()) {
        List<String> account = new ArrayList<>();
        account.add(accounts.get(root).get(0));  // Name
        account.addAll(rootToEmails.get(root));  // Sorted emails
        result.add(account);
    }

    return result;
}

Longest Consecutive Sequence

public int longestConsecutive(int[] nums) {
    if (nums.length == 0) return 0;

    Map<Integer, Integer> numToIndex = new HashMap<>();
    UnionFind uf = new UnionFind(nums.length);

    for (int i = 0; i < nums.length; i++) {
        if (numToIndex.containsKey(nums[i])) continue;  // Skip duplicates

        numToIndex.put(nums[i], i);

        if (numToIndex.containsKey(nums[i] - 1)) {
            uf.union(i, numToIndex.get(nums[i] - 1));
        }
        if (numToIndex.containsKey(nums[i] + 1)) {
            uf.union(i, numToIndex.get(nums[i] + 1));
        }
    }

    // Count elements in each component
    Map<Integer, Integer> rootToSize = new HashMap<>();
    int maxLen = 0;

    for (int i = 0; i < nums.length; i++) {
        if (!numToIndex.containsKey(nums[i])) continue;
        int root = uf.find(i);
        int size = rootToSize.getOrDefault(root, 0) + 1;
        rootToSize.put(root, size);
        maxLen = Math.max(maxLen, size);
    }

    return maxLen;
}

Union-Find with Size (for Largest Component)

class UnionFindWithSize {
    private int[] parent;
    private int[] size;

    public UnionFindWithSize(int n) {
        parent = new int[n];
        size = new int[n];
        for (int i = 0; i < n; i++) {
            parent[i] = i;
            size[i] = 1;
        }
    }

    public int find(int x) {
        if (parent[x] != x) {
            parent[x] = find(parent[x]);
        }
        return parent[x];
    }

    public void union(int x, int y) {
        int rootX = find(x);
        int rootY = find(y);

        if (rootX != rootY) {
            // Union by size
            if (size[rootX] < size[rootY]) {
                parent[rootX] = rootY;
                size[rootY] += size[rootX];
            } else {
                parent[rootY] = rootX;
                size[rootX] += size[rootY];
            }
        }
    }

    public int getSize(int x) {
        return size[find(x)];
    }
}

Earliest Time When All Become Friends

public int earliestAcq(int[][] logs, int n) {
    // Sort by timestamp
    Arrays.sort(logs, (a, b) -> a[0] - b[0]);

    UnionFind uf = new UnionFind(n);

    for (int[] log : logs) {
        uf.union(log[1], log[2]);
        if (uf.getCount() == 1) {
            return log[0];
        }
    }

    return -1;
}

Time & Space Complexity

Operation Time (with optimizations) Amortized
Find O(α(n)) Nearly O(1)
Union O(α(n)) Nearly O(1)
Building O(n) O(n)

Where α(n) is the inverse Ackermann function, effectively constant.

Common Interview Questions

  1. Number of Connected Components - Count disjoint sets
  2. Number of Islands - Grid connectivity
  3. Graph Valid Tree - Check n-1 edges, no cycle, connected
  4. Redundant Connection - Find edge that creates cycle
  5. Accounts Merge - Merge by shared emails
  6. Longest Consecutive Sequence - Union adjacent numbers
  7. Friend Circles - Count friend groups
  8. Sentence Similarity II - Transitive word similarity
  9. Smallest String With Swaps - Group swappable indices
  10. Satisfiability of Equality Equations - Process == before !=
  11. Regions Cut By Slashes - Grid subdivision
  12. Most Stones Removed - Max removable (n - components)

Tips & Tricks

  • Always use path compression in find()
  • Union by rank or size for balanced trees
  • Initialize parent[i] = i
  • Check if already connected before union for cycle detection
  • Track component count for "number of groups" problems
  • Can track additional info: size, min, max per component
  • For 2D grids, use i * cols + j as linear index