ProbableOdyssey

Deep comparison of Python dictionaries

Imagine you have configuration data, API payloads, or test expectations, and you want to verify that a smaller, “subset” dictionary is fully contained within a larger “superset” dictionary, including all the nested dictionaries and lists that it may contain.

Python’s built-in tools like dict.items() and set operations are fantastic for shallow comparisons.

There’s some built in methods that spring to mind for this problem, and they’re perfect for flat simple dictionaries:

While useful for simple dictionaries, it only checks the top level. If a value is another dictionary, it compares the dictionary objects themselves, not their contents. For example { 'a': {'b': 1} } is not a subset of { 'a': {'b': 1, 'c': 2} } using these method, because the inner dictionaries are different objects.

These methods are perfect for flat dictionaries but fall flat when dealing with nested data.

from typing import Any

def is_subset_dict(subset: dict[str, Any], superset: dict[str, Any]) -> bool:
    """Checks if a dictionary 'subset' is a deep subset of 'superset'.

    All keys in 'subset' must exist in 'superset', and their values
    must be either equal, or if they are dictionaries/lists, they must
    also be subsets/sublists respectively.
    """
    if not isinstance(subset, dict) or not isinstance(superset, dict):
        # If not both dictionaries, they must be strictly equal
        return subset == superset

    for key, value in subset.items():
        if key not in superset:
            return False
        superset_value = superset[key]

        if isinstance(value, dict) and isinstance(superset_value, dict):
            # Recurse for nested dictionaries
            if not is_subset_dict(value, superset_value):
                return False
        elif isinstance(value, list) and isinstance(superset_value, list):
            # Special handling for lists (with a specific interpretation)
            if len(value) > len(superset_value):
                return False
            # Here, we're specifically checking if the *first item*
            # of the 'subset' list is a (deep) subset of the *first item*
            # of the 'superset' list. This is a very specific interpretation
            # and might need adjustment based on your exact list comparison needs.
            # For example, if you want to check if *any* item in 'subset' list
            # is deeply contained within *any* item of 'superset' list,
            # the logic would be more complex (e.g., using 'any' and recursion).
            if value and superset_value:  # make sure lists are not empty
                if not is_subset_dict(value[0], superset_value[0]):
                    return False
        else:
            # For all other types, values must be strictly equal
            if value != superset_value:
                return False
    return True

Let’s see it in action with a superset representing a desired application configuration and a subset representing current settings or a partial update:

# Our larger, complete configuration
superset_config = {
    "app_name": "MyAwesomeApp",
    "version": "1.0.0",
    "settings": {
        "debug_mode": True,
        "logging_level": "INFO",
        "features": {
            "email_notifications": True,
            "slack_integration": False
        }
    },
    "users": [
        {"id": 1, "name": "Alice"},
        {"id": 2, "name": "Bob"}
    ],
    "database": {
        "host": "localhost",
        "port": 5432
    }
}

# A smaller configuration we want to check
# Does it "fit" within superset_config?
subset_check_1 = {
    "app_name": "MyAwesomeApp",
    "settings": { # Nested dictionary check
        "debug_mode": True
    },
    "database": { # Nested dictionary with fewer keys
        "host": "localhost"
    }
}

# Another subset for comparison, including the list check
subset_check_2 = {
    "settings": {
        "features": { # Deeply nested check
            "email_notifications": True
        }
    },
    "users": [ # List check: first item of subset vs. first item of superset
        {"id": 1} # 'id' is a subset of the first user dict in superset_config['users']
    ]
}

# This one will fail because 'environment' key doesn't exist in superset_config
subset_check_3_fail = {
    "environment": "production"
}

# This one will fail because 'debug_mode' value doesn't match
subset_check_4_fail = {
    "settings": {
        "debug_mode": False
    }
}

# This one will fail because the first user's name is incorrect in the subset's list check
subset_check_5_fail = {
    "users": [
        {"id": 1, "name": "Eve"} # 'Eve' != 'Alice'
    ]
}

print(is_subset_dict(subset_check_1, superset_config))
# True

print(is_subset_dict(subset_check_2, superset_config))
# True

print(is_subset_dict(subset_check_3_fail, superset_config))
# False (key missing)

print(is_subset_dict(subset_check_4_fail, superset_config))
# False (value mismatch)

print(is_subset_dict(subset_check_5_fail, superset_config))
# False (nested list item mismatch)

Reply to this post by email ↪