Cache Locality
Definition

Types of Locality

Cache-Friendly vs Cache-Unfriendly

Data Structure Impact

Code Examples
// Example 1: Matrix traversal
public class MatrixSum {
// Cache-friendly: ~10x faster for large matrices
public long sumRowMajor(int[][] matrix) {
long sum = 0;
for (int i = 0; i < matrix.length; i++) {
for (int j = 0; j < matrix[0].length; j++) {
sum += matrix[i][j]; // Sequential access
}
}
return sum;
}
// Cache-unfriendly
public long sumColMajor(int[][] matrix) {
long sum = 0;
for (int j = 0; j < matrix[0].length; j++) {
for (int i = 0; i < matrix.length; i++) {
sum += matrix[i][j]; // Strided access
}
}
return sum;
}
}
// Example 2: Array of Structs vs Struct of Arrays
// Array of Structs (AoS) - cache-friendly when accessing all fields
class Particle {
float x, y, z; // Position
float vx, vy, vz; // Velocity
}
Particle[] particles = new Particle[1000];
// Process all fields of each particle - good cache usage
for (Particle p : particles) {
p.x += p.vx;
p.y += p.vy;
p.z += p.vz;
}
// Struct of Arrays (SoA) - better when accessing one field at a time
class ParticleSystem {
float[] x, y, z; // Positions contiguous
float[] vx, vy, vz; // Velocities contiguous
}
// Process only x coordinates - excellent cache usage
for (int i = 0; i < n; i++) {
x[i] += vx[i]; // Sequential access
}
// Example 3: Loop tiling for cache optimization
public class MatrixMultiply {
// Naive: Poor cache usage for large matrices
public void multiplyNaive(int[][] A, int[][] B, int[][] C, int n) {
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
for (int k = 0; k < n; k++)
C[i][j] += A[i][k] * B[k][j];
}
// Tiled: Better cache usage
public void multiplyTiled(int[][] A, int[][] B, int[][] C, int n) {
int TILE = 64; // Fits in L1 cache
for (int ii = 0; ii < n; ii += TILE)
for (int jj = 0; jj < n; jj += TILE)
for (int kk = 0; kk < n; kk += TILE)
for (int i = ii; i < Math.min(ii+TILE, n); i++)
for (int j = jj; j < Math.min(jj+TILE, n); j++)
for (int k = kk; k < Math.min(kk+TILE, n); k++)
C[i][j] += A[i][k] * B[k][j];
}
}
Optimization Techniques

False Sharing

Tips & Tricks
