From 64e9a603f8272c1e190e9884df0cc96c55ea174e Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 22 Jan 2026 09:31:13 -0800 Subject: [PATCH 1/3] add docs and refactor --- pyiceberg/catalog/hive.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index 3046e25626..e161f0a81e 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -551,23 +551,30 @@ def commit_table( if hive_table and current_table: # Table exists, update it. - new_parameters = _construct_parameters( + + # Note on table properties: + # - Iceberg table properties are stored in both HMS and Iceberg metadata JSON. + # - Updates are reflected in both locations + # - Existing HMS table properties (set by external systems like Hive/Spark) are preserved. + # + # While it is possible to modify HMS table properties through this API, it is not recommended: + # - New/Updated HMS table properties will also be stored in Iceberg metadata (even though it's HMS-specific) + # - HMS properties cannot be deleted since they are not visible to Iceberg + # - Mixing HMS-specific properties in Iceberg metadata can cause confusion + new_iceberg_properties = _construct_parameters( metadata_location=updated_staged_table.metadata_location, previous_metadata_location=current_table.metadata_location, metadata_properties=updated_staged_table.properties, ) - # Detect properties that were removed from Iceberg metadata - removed_keys = current_table.properties.keys() - updated_staged_table.properties.keys() - - # Sync HMS parameters: Iceberg metadata is the source of truth, HMS parameters are - # a projection of Iceberg state plus any HMS-only properties. - # Start with existing HMS params, remove deleted Iceberg properties, then apply Iceberg values. - merged_params = dict(hive_table.parameters or {}) - for key in removed_keys: - merged_params.pop(key, None) - merged_params.update(new_parameters) - hive_table.parameters = merged_params + deleted_iceberg_properties = current_table.properties.keys() - updated_staged_table.properties.keys() + + # Merge: preserve HMS-only properties, remove deleted Iceberg properties, apply new Iceberg properties + existing_hms_parameters = dict(hive_table.parameters or {}) + for key in deleted_iceberg_properties: + existing_hms_parameters.pop(key, None) + existing_hms_parameters.update(new_iceberg_properties) + hive_table.parameters = existing_hms_parameters # Update hive's schema and properties hive_table.sd = _construct_hive_storage_descriptor( From c252fb78f52e531400f259a118acc7edac8d2787 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 23 Jan 2026 09:30:49 -0800 Subject: [PATCH 2/3] Update pyiceberg/catalog/hive.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- pyiceberg/catalog/hive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index e161f0a81e..2a52324d54 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -558,7 +558,7 @@ def commit_table( # - Existing HMS table properties (set by external systems like Hive/Spark) are preserved. # # While it is possible to modify HMS table properties through this API, it is not recommended: - # - New/Updated HMS table properties will also be stored in Iceberg metadata (even though it's HMS-specific) + # - New/Updated HMS table properties will also be stored in Iceberg metadata (even though it is HMS-specific) # - HMS properties cannot be deleted since they are not visible to Iceberg # - Mixing HMS-specific properties in Iceberg metadata can cause confusion new_iceberg_properties = _construct_parameters( From 6a928209bdd93f2d561c34e4f23ffc4241008558 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 23 Jan 2026 12:37:52 -0500 Subject: [PATCH 3/3] feedback --- pyiceberg/catalog/hive.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index 2a52324d54..1bec186ca8 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -558,9 +558,11 @@ def commit_table( # - Existing HMS table properties (set by external systems like Hive/Spark) are preserved. # # While it is possible to modify HMS table properties through this API, it is not recommended: - # - New/Updated HMS table properties will also be stored in Iceberg metadata (even though it is HMS-specific) - # - HMS properties cannot be deleted since they are not visible to Iceberg # - Mixing HMS-specific properties in Iceberg metadata can cause confusion + # - New/updated HMS table properties will also be stored in Iceberg metadata (even though it is HMS-specific) + # - HMS-native properties (set outside Iceberg) cannot be deleted since they are not visible to Iceberg + # (However, if you first SET an HMS property via Iceberg, it becomes tracked in Iceberg metadata, + # and can then be deleted via Iceberg - which removes it from both Iceberg metadata and HMS) new_iceberg_properties = _construct_parameters( metadata_location=updated_staged_table.metadata_location, previous_metadata_location=current_table.metadata_location, @@ -569,7 +571,7 @@ def commit_table( # Detect properties that were removed from Iceberg metadata deleted_iceberg_properties = current_table.properties.keys() - updated_staged_table.properties.keys() - # Merge: preserve HMS-only properties, remove deleted Iceberg properties, apply new Iceberg properties + # Merge: preserve HMS-native properties, remove deleted Iceberg properties, apply new Iceberg properties existing_hms_parameters = dict(hive_table.parameters or {}) for key in deleted_iceberg_properties: existing_hms_parameters.pop(key, None)