Deep comparison of Python dictionaries
Imagine you have configuration data, API payloads, or test expectations, and you want to verify that a smaller, “subset” dictionary is fully contained within a larger “superset” dictionary, including all the nested dictionaries and lists that it may contain.
Python’s built-in tools like dict.items() and set operations are fantastic for
shallow comparisons.
There’s some built in methods that spring to mind for this problem, and they’re perfect for flat simple dictionaries:
subset.items() <= superset.items()all(item in superset.items() for item in subset.items())
While useful for simple dictionaries, it only checks the top level. If a value
is another dictionary, it compares the dictionary objects themselves, not their
contents. For example { 'a': {'b': 1} } is not a subset of { 'a': {'b': 1, 'c': 2} } using these method, because the inner dictionaries are different
objects.
These methods are perfect for flat dictionaries but fall flat when dealing with nested data.
from typing import Any
def is_subset_dict(subset: dict[str, Any], superset: dict[str, Any]) -> bool:
"""Checks if a dictionary 'subset' is a deep subset of 'superset'.
All keys in 'subset' must exist in 'superset', and their values
must be either equal, or if they are dictionaries/lists, they must
also be subsets/sublists respectively.
"""
if not isinstance(subset, dict) or not isinstance(superset, dict):
# If not both dictionaries, they must be strictly equal
return subset == superset
for key, value in subset.items():
if key not in superset:
return False
superset_value = superset[key]
if isinstance(value, dict) and isinstance(superset_value, dict):
# Recurse for nested dictionaries
if not is_subset_dict(value, superset_value):
return False
elif isinstance(value, list) and isinstance(superset_value, list):
# Special handling for lists (with a specific interpretation)
if len(value) > len(superset_value):
return False
# Here, we're specifically checking if the *first item*
# of the 'subset' list is a (deep) subset of the *first item*
# of the 'superset' list. This is a very specific interpretation
# and might need adjustment based on your exact list comparison needs.
# For example, if you want to check if *any* item in 'subset' list
# is deeply contained within *any* item of 'superset' list,
# the logic would be more complex (e.g., using 'any' and recursion).
if value and superset_value: # make sure lists are not empty
if not is_subset_dict(value[0], superset_value[0]):
return False
else:
# For all other types, values must be strictly equal
if value != superset_value:
return False
return TrueLet’s see it in action with a superset representing a desired application
configuration and a subset representing current settings or a partial update:
# Our larger, complete configuration
superset_config = {
"app_name": "MyAwesomeApp",
"version": "1.0.0",
"settings": {
"debug_mode": True,
"logging_level": "INFO",
"features": {
"email_notifications": True,
"slack_integration": False
}
},
"users": [
{"id": 1, "name": "Alice"},
{"id": 2, "name": "Bob"}
],
"database": {
"host": "localhost",
"port": 5432
}
}
# A smaller configuration we want to check
# Does it "fit" within superset_config?
subset_check_1 = {
"app_name": "MyAwesomeApp",
"settings": { # Nested dictionary check
"debug_mode": True
},
"database": { # Nested dictionary with fewer keys
"host": "localhost"
}
}
# Another subset for comparison, including the list check
subset_check_2 = {
"settings": {
"features": { # Deeply nested check
"email_notifications": True
}
},
"users": [ # List check: first item of subset vs. first item of superset
{"id": 1} # 'id' is a subset of the first user dict in superset_config['users']
]
}
# This one will fail because 'environment' key doesn't exist in superset_config
subset_check_3_fail = {
"environment": "production"
}
# This one will fail because 'debug_mode' value doesn't match
subset_check_4_fail = {
"settings": {
"debug_mode": False
}
}
# This one will fail because the first user's name is incorrect in the subset's list check
subset_check_5_fail = {
"users": [
{"id": 1, "name": "Eve"} # 'Eve' != 'Alice'
]
}
print(is_subset_dict(subset_check_1, superset_config))
# True
print(is_subset_dict(subset_check_2, superset_config))
# True
print(is_subset_dict(subset_check_3_fail, superset_config))
# False (key missing)
print(is_subset_dict(subset_check_4_fail, superset_config))
# False (value mismatch)
print(is_subset_dict(subset_check_5_fail, superset_config))
# False (nested list item mismatch)