From 04b252eb3bedab8033768019cd09429d0c07ed6a Mon Sep 17 00:00:00 2001 From: echonesis Date: Wed, 28 Jan 2026 15:56:13 +0800 Subject: [PATCH 1/2] HDDS-13572. SCMSnapshotProvider should not auto-create Ratis directory --- .../hadoop/hdds/scm/ha/SCMHAManagerImpl.java | 27 +- .../hdds/scm/ha/SCMSnapshotProvider.java | 99 ++++++- .../scm/server/StorageContainerManager.java | 17 ++ .../hdds/scm/ha/TestSCMSnapshotProvider.java | 247 ++++++++++++++++++ .../upgrade/TestScmStartupSlvLessThanMlv.java | 7 + .../ozonesecure-ha/test-repair-tools.sh | 9 + .../main/smoketest/repair/om-compact.robot | 17 +- 7 files changed, 406 insertions(+), 17 deletions(-) create mode 100644 hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMSnapshotProvider.java diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManagerImpl.java index 0cc600160e11..cb3e66c7e826 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManagerImpl.java @@ -95,10 +95,31 @@ public SCMHAManagerImpl(final ConfigurationSource conf, } @VisibleForTesting - protected SCMSnapshotProvider newScmSnapshotProvider(StorageContainerManager storageContainerManager) { - return new SCMSnapshotProvider(storageContainerManager.getConfiguration(), + protected SCMSnapshotProvider newScmSnapshotProvider( + StorageContainerManager storageContainerManager) { + + // Determine startup option based on snapshot directory existence + String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory( + storageContainerManager.getConfiguration()); + File snapshotDirFile = new File(snapshotDir); + + SCMSnapshotProvider.StartupOption startupOption; + if (snapshotDirFile.exists()) { + // Snapshot directory exists -> use NORMAL mode + startupOption = SCMSnapshotProvider.StartupOption.NORMAL; + } else { + // Snapshot directory does not exist -> use FORMAT mode + startupOption = SCMSnapshotProvider.StartupOption.FORMAT; + } + + LOG.info("Creating SCMSnapshotProvider with {} mode. Snapshot directory: {}", + startupOption, snapshotDir); + + return new SCMSnapshotProvider( + storageContainerManager.getConfiguration(), storageContainerManager.getSCMHANodeDetails().getPeerNodeDetails(), - storageContainerManager.getScmCertificateClient()); + storageContainerManager.getScmCertificateClient(), + startupOption); } /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMSnapshotProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMSnapshotProvider.java index ac960d5c1c7e..57647133c8c0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMSnapshotProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMSnapshotProvider.java @@ -55,24 +55,111 @@ public class SCMSnapshotProvider { private final CertificateClient scmCertificateClient; + /** + * Startup options for SCM snapshot provider. + */ + public enum StartupOption { + /** + * FORMAT mode: Ratis snapshot directory should not exist. + * Will create and initialize a new Ratis snapshot directory. + */ + FORMAT, + /** + * NORMAL mode: Ratis snapshot directory should already exist. + * Will read from existing Ratis snapshot directory. + */ + NORMAL + } + + /** + * Creates SCMSnapshotProvider with default NORMAL startup option. + * This constructor is used in most scenarios where the Ratis snapshot + * directory is expected to already exist. + * + * @param conf Configuration source + * @param peerNodes List of peer SCM nodes + * @param scmCertificateClient Certificate client for secure communication + */ public SCMSnapshotProvider(ConfigurationSource conf, List peerNodes, CertificateClient scmCertificateClient) { - LOG.info("Initializing SCM Snapshot Provider"); + this(conf, peerNodes, scmCertificateClient, StartupOption.NORMAL); + } + + /** + * Creates SCMSnapshotProvider with specified startup option. + * + * @param conf Configuration source + * @param peerNodes List of peer SCM nodes + * @param scmCertificateClient Certificate client for secure communication + * @param startupOption Startup mode: FORMAT (create new) or NORMAL (use existing) + */ + public SCMSnapshotProvider(ConfigurationSource conf, + List peerNodes, + CertificateClient scmCertificateClient, + StartupOption startupOption) { + LOG.info("Initializing SCM Snapshot Provider with startup option: {}", + startupOption); this.conf = conf; this.scmCertificateClient = scmCertificateClient; - // Create Ratis storage dir + + // Get directory paths from configuration String scmRatisDirectory = SCMHAUtils.getSCMRatisDirectory(conf); + String scmSnapshotDirectory = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); if (scmRatisDirectory == null || scmRatisDirectory.isEmpty()) { throw new IllegalArgumentException(HddsConfigKeys.OZONE_METADATA_DIRS + " must be defined."); } - HddsUtils.createDir(scmRatisDirectory); - // Create Ratis snapshot dir - scmSnapshotDir = HddsUtils.createDir( - SCMHAUtils.getSCMRatisSnapshotDirectory(conf)); + if (scmSnapshotDirectory == null || scmSnapshotDirectory.isEmpty()) { + throw new IllegalArgumentException("SCM Ratis snapshot directory must be defined."); + } + + File ratisDir = new File(scmRatisDirectory); + File snapshotDir = new File(scmSnapshotDirectory); + + // Ratis storage directory should already be created by SCMRatisServerImpl + // or by init/bootstrap commands. SCMSnapshotProvider is NOT responsible for creating it. + if (!ratisDir.exists()) { + throw new IllegalStateException( + "Ratis storage directory does not exist: " + ratisDir.getAbsolutePath() + + ". It should have been created by SCMRatisServerImpl.initialize()."); + } + + // Handle snapshot directory based on startup option + switch (startupOption) { + case FORMAT: + // FORMAT mode: snapshot directory should NOT exist + if (snapshotDir.exists()) { + throw new IllegalStateException( + "Cannot format: Ratis snapshot directory already exists: " + + snapshotDir.getAbsolutePath()); + } + // Create new snapshot directory + this.scmSnapshotDir = HddsUtils.createDir(scmSnapshotDirectory); + LOG.info("Formatted: created new Ratis snapshot directory at {}", + this.scmSnapshotDir.getAbsolutePath()); + break; + + case NORMAL: + // NORMAL mode: snapshot directory MUST exist + if (!snapshotDir.exists()) { + throw new IllegalStateException( + "Ratis snapshot directory does not exist: " + + snapshotDir.getAbsolutePath() + + ". Please run 'ozone scm --init' first."); + } + this.scmSnapshotDir = snapshotDir; + LOG.info("Using existing Ratis snapshot directory at {}", + this.scmSnapshotDir.getAbsolutePath()); + break; + + default: + throw new IllegalArgumentException("Unknown startup option: " + startupOption); + } + + // Initialize peer nodes map if (peerNodes != null) { this.peerNodesMap = new HashMap<>(); for (SCMNodeDetails peerNode : peerNodes) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index bb6d59e0d054..def3dbb7ad62 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -1210,6 +1210,16 @@ public static boolean scmBootstrap(OzoneConfiguration conf) scmStorageConfig.setPrimaryScmNodeId(scmInfo.getScmId()); scmStorageConfig.setSCMHAFlag(true); scmStorageConfig.initialize(); + + // Create Ratis storage and snapshot directories during bootstrap + // This ensures they exist before the bootstrapped SCM starts normally + String ratisStorageDir = SCMHAUtils.getSCMRatisDirectory(conf); + String ratisSnapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); + HddsUtils.createDir(ratisStorageDir); + HddsUtils.createDir(ratisSnapshotDir); + LOG.info("Created Ratis storage directory: {}", ratisStorageDir); + LOG.info("Created Ratis snapshot directory: {}", ratisSnapshotDir); + LOG.info("SCM BootStrap is successful for ClusterID {}, SCMID {}", scmInfo.getClusterId(), scmStorageConfig.getScmId()); LOG.info("Primary SCM Node ID {}", @@ -1340,6 +1350,13 @@ private static SCMStorageConfig initializeRatis(OzoneConfiguration conf) final SCMHANodeDetails haDetails = SCMHANodeDetails.loadSCMHAConfig(conf, storageConfig); SCMRatisServerImpl.initialize(storageConfig.getClusterID(), storageConfig.getScmId(), haDetails.getLocalNodeDetails(), conf); + + // Create Ratis snapshot directory during initialization + // This ensures the directory exists before SCM starts normally + String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); + HddsUtils.createDir(snapshotDir); + LOG.info("Created Ratis snapshot directory: {}", snapshotDir); + storageConfig.setSCMHAFlag(true); storageConfig.setPrimaryScmNodeId(storageConfig.getScmId()); storageConfig.forceInitialize(); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMSnapshotProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMSnapshotProvider.java new file mode 100644 index 000000000000..bedb308d9cbf --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMSnapshotProvider.java @@ -0,0 +1,247 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.ha; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +/** + * Test cases for {@link SCMSnapshotProvider} with different startup modes. + */ +class TestSCMSnapshotProvider { + + /** + * Test 1: Default constructor (no startup option parameter). + * Should use NORMAL mode by default. + * Expects both Ratis storage and snapshot directories to exist. + */ + @Test + void testDefaultConstructor(@TempDir Path tempDir) throws IOException { + // Setup: Create both directories (simulating initialized SCM) + OzoneConfiguration conf = createConfig(tempDir); + String ratisDir = SCMHAUtils.getSCMRatisDirectory(conf); + String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); + + assertTrue(new File(ratisDir).mkdirs()); + assertTrue(new File(snapshotDir).mkdirs()); + + CertificateClient certClient = mock(CertificateClient.class); + + // Test: Use default constructor (should use NORMAL mode) + SCMSnapshotProvider provider = new SCMSnapshotProvider( + conf, null, certClient); + + // Verify: Provider created successfully + assertNotNull(provider); + assertNotNull(provider.getScmSnapshotDir()); + assertTrue(provider.getScmSnapshotDir().exists()); + assertEquals(snapshotDir, provider.getScmSnapshotDir().getAbsolutePath()); + } + + /** + * Test 2: Default constructor when snapshot directory does not exist. + * Should fail because NORMAL mode expects directory to exist. + */ + @Test + void testDefaultConstructorFailsWhenSnapshotDirMissing(@TempDir Path tempDir) { + // Setup: Create only Ratis storage directory, not snapshot directory + OzoneConfiguration conf = createConfig(tempDir); + String ratisDir = SCMHAUtils.getSCMRatisDirectory(conf); + assertTrue(new File(ratisDir).mkdirs()); + + CertificateClient certClient = mock(CertificateClient.class); + + // Test & Verify: Should throw exception + IllegalStateException exception = assertThrows( + IllegalStateException.class, + () -> new SCMSnapshotProvider(conf, null, certClient)); + + assertTrue(exception.getMessage().contains("Ratis snapshot directory does not exist")); + } + + /** + * Test 3: Explicit NORMAL mode with existing directories. + * Should succeed when both directories exist. + */ + @Test + void testNormalModeWithExistingDirectories(@TempDir Path tempDir) throws IOException { + // Setup: Create both directories + OzoneConfiguration conf = createConfig(tempDir); + String ratisDir = SCMHAUtils.getSCMRatisDirectory(conf); + String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); + + assertTrue(new File(ratisDir).mkdirs()); + assertTrue(new File(snapshotDir).mkdirs()); + + CertificateClient certClient = mock(CertificateClient.class); + + // Test: Explicitly use NORMAL mode + SCMSnapshotProvider provider = new SCMSnapshotProvider( + conf, null, certClient, SCMSnapshotProvider.StartupOption.NORMAL); + + // Verify: Provider created successfully + assertNotNull(provider); + assertNotNull(provider.getScmSnapshotDir()); + assertTrue(provider.getScmSnapshotDir().exists()); + } + + /** + * Test 4: Explicit NORMAL mode when snapshot directory missing. + * Should fail with clear error message. + */ + @Test + void testNormalModeFailsWhenSnapshotDirMissing(@TempDir Path tempDir) { + // Setup: Create only Ratis storage directory + OzoneConfiguration conf = createConfig(tempDir); + String ratisDir = SCMHAUtils.getSCMRatisDirectory(conf); + assertTrue(new File(ratisDir).mkdirs()); + + CertificateClient certClient = mock(CertificateClient.class); + + // Test & Verify: Should throw exception + IllegalStateException exception = assertThrows( + IllegalStateException.class, + () -> new SCMSnapshotProvider( + conf, null, certClient, SCMSnapshotProvider.StartupOption.NORMAL)); + + assertTrue(exception.getMessage().contains("Ratis snapshot directory does not exist")); + assertTrue(exception.getMessage().contains("Please run 'ozone scm --init' first")); + } + + /** + * Test 5: Explicit NORMAL mode when Ratis storage directory missing. + * Should fail because SCMRatisServerImpl should have created it. + */ + @Test + void testNormalModeFailsWhenRatisDirMissing(@TempDir Path tempDir) { + // Setup: Don't create any directories + OzoneConfiguration conf = createConfig(tempDir); + CertificateClient certClient = mock(CertificateClient.class); + + // Test & Verify: Should throw exception + IllegalStateException exception = assertThrows( + IllegalStateException.class, + () -> new SCMSnapshotProvider( + conf, null, certClient, SCMSnapshotProvider.StartupOption.NORMAL)); + + assertTrue(exception.getMessage().contains("Ratis storage directory does not exist")); + assertTrue(exception.getMessage().contains("SCMRatisServerImpl.initialize()")); + } + + /** + * Test 6: FORMAT mode creates new snapshot directory. + * Should succeed when snapshot directory does not exist. + */ + @Test + void testFormatModeCreatesNewDirectory(@TempDir Path tempDir) throws IOException { + // Setup: Create only Ratis storage directory (as SCMRatisServerImpl would) + OzoneConfiguration conf = createConfig(tempDir); + String ratisDir = SCMHAUtils.getSCMRatisDirectory(conf); + String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); + + assertTrue(new File(ratisDir).mkdirs()); + // Snapshot directory should NOT exist for FORMAT mode + + CertificateClient certClient = mock(CertificateClient.class); + + // Test: Use FORMAT mode + SCMSnapshotProvider provider = new SCMSnapshotProvider( + conf, null, certClient, SCMSnapshotProvider.StartupOption.FORMAT); + + // Verify: Provider created and snapshot directory was created + assertNotNull(provider); + assertNotNull(provider.getScmSnapshotDir()); + assertTrue(provider.getScmSnapshotDir().exists()); + assertEquals(snapshotDir, provider.getScmSnapshotDir().getAbsolutePath()); + } + + /** + * Test 7: FORMAT mode fails when snapshot directory already exists. + * Should prevent accidental data loss. + */ + @Test + void testFormatModeFailsWhenSnapshotDirExists(@TempDir Path tempDir) { + // Setup: Create both directories (simulating existing SCM) + OzoneConfiguration conf = createConfig(tempDir); + String ratisDir = SCMHAUtils.getSCMRatisDirectory(conf); + String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); + + assertTrue(new File(ratisDir).mkdirs()); + assertTrue(new File(snapshotDir).mkdirs()); // This exists! + + CertificateClient certClient = mock(CertificateClient.class); + + // Test & Verify: FORMAT should fail when directory exists + IllegalStateException exception = assertThrows( + IllegalStateException.class, + () -> new SCMSnapshotProvider( + conf, null, certClient, SCMSnapshotProvider.StartupOption.FORMAT)); + + assertTrue(exception.getMessage().contains("Cannot format")); + assertTrue(exception.getMessage().contains("already exists")); + } + + /** + * Test 8: FORMAT mode fails when Ratis storage directory missing. + * Even in FORMAT mode, Ratis storage dir should exist (created by SCMRatisServerImpl). + */ + @Test + void testFormatModeFailsWhenRatisDirMissing(@TempDir Path tempDir) { + // Setup: Don't create Ratis storage directory + OzoneConfiguration conf = createConfig(tempDir); + CertificateClient certClient = mock(CertificateClient.class); + + // Test & Verify: Should throw exception + IllegalStateException exception = assertThrows( + IllegalStateException.class, + () -> new SCMSnapshotProvider( + conf, null, certClient, SCMSnapshotProvider.StartupOption.FORMAT)); + + assertTrue(exception.getMessage().contains("Ratis storage directory does not exist")); + } + + /** + * Helper method to create OzoneConfiguration with temp directories. + */ + private OzoneConfiguration createConfig(Path tempDir) { + OzoneConfiguration conf = new OzoneConfiguration(); + + String metadataDir = tempDir.resolve("metadata").toString(); + String ratisStorageDir = tempDir.resolve("ratis").toString(); + String ratisSnapshotDir = tempDir.resolve("ratis-snapshot").toString(); + + conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, metadataDir); + conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_STORAGE_DIR, ratisStorageDir); + conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_SNAPSHOT_DIR, ratisSnapshotDir); + + return conf; + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestScmStartupSlvLessThanMlv.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestScmStartupSlvLessThanMlv.java index 984306fb8a42..25de21766947 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestScmStartupSlvLessThanMlv.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestScmStartupSlvLessThanMlv.java @@ -53,6 +53,13 @@ public void testStartupSlvLessThanMlv(@TempDir Path tempDir) File scmSubdir = tempDir.resolve("scm").resolve("current").toFile(); assertTrue(scmSubdir.mkdirs()); + // Create Ratis directories to simulate a realistic downgrade scenario + // where SCM was previously running with a newer version + File ratisDir = tempDir.resolve("scm.ratis").toFile(); + File snapshotDir = tempDir.resolve("scm.ratis.snapshot").toFile(); + assertTrue(ratisDir.mkdirs()); + assertTrue(snapshotDir.mkdirs()); + OzoneConfiguration conf = new OzoneConfiguration(); conf.set(ScmConfigKeys.OZONE_SCM_DB_DIRS, tempDir.toAbsolutePath().toString()); diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-repair-tools.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-repair-tools.sh index ca6fa5a0cbd8..df92e93c59cc 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-repair-tools.sh +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-repair-tools.sh @@ -98,5 +98,14 @@ echo "Test keys created" echo "Restarting OM after key creation to flush and generate sst files" docker restart "${om_container}" +wait_for_om_leader + +echo "Deleting test keys to create tombstones" +docker exec "${om_container}" bash -c "kinit -kt /etc/security/keytabs/testuser.keytab testuser/om@EXAMPLE.COM && ozone fs -rm -R -skipTrash ofs://omservice/vol1/bucket1" +echo "Test keys deleted" + +echo "Restarting OM after key deletion to flush tombstones to sst files" +docker restart "${om_container}" +wait_for_om_leader execute_robot_test ${OM} repair/om-compact.robot diff --git a/hadoop-ozone/dist/src/main/smoketest/repair/om-compact.robot b/hadoop-ozone/dist/src/main/smoketest/repair/om-compact.robot index 164cf410dcbc..782304ed40f7 100644 --- a/hadoop-ozone/dist/src/main/smoketest/repair/om-compact.robot +++ b/hadoop-ozone/dist/src/main/smoketest/repair/om-compact.robot @@ -36,19 +36,20 @@ Compact OM DB Column Family [Arguments] ${column_family} Execute ozone repair om compact --cf=${column_family} --service-id ${OM_SERVICE_ID} --node-id om1 +SST Size Should Be Reduced + [Arguments] ${original_size} + ${current_size} = Get OM DB SST Files Size + Should Be True ${current_size} < ${original_size} + ... OM DB size should be reduced after compaction. Before: ${original_size}, After: ${current_size} + *** Test Cases *** Testing OM DB Size Reduction After Compaction - # Test keys are already created and flushed - # Delete keys to create tombstones that need compaction - Delete Test Keys - + # Test keys are already created, deleted and flushed by the test script ${size_before_compaction} = Get OM DB SST Files Size Compact OM DB Column Family fileTable Compact OM DB Column Family deletedTable Compact OM DB Column Family deletedDirectoryTable - - ${size_after_compaction} = Get OM DB SST Files Size - Should Be True ${size_after_compaction} < ${size_before_compaction} - ... OM DB size should be reduced after compaction. Before: ${size_before_compaction}, After: ${size_after_compaction} + # Compaction is asynchronous, poll until size is reduced + Wait Until Keyword Succeeds 60sec 5sec SST Size Should Be Reduced ${size_before_compaction} From b4411ad4fb28243d00f545a78ae8b291f2559c7f Mon Sep 17 00:00:00 2001 From: echonesis Date: Thu, 5 Feb 2026 16:01:17 +0800 Subject: [PATCH 2/2] fix: CR update --- .../hadoop/hdds/scm/ha/SCMHAManagerImpl.java | 21 +-- .../hdds/scm/ha/SCMSnapshotProvider.java | 93 ++--------- .../scm/server/StorageContainerManager.java | 32 +++- .../hdds/scm/ha/TestSCMSnapshotProvider.java | 152 ++---------------- .../ozonesecure-ha/test-repair-tools.sh | 9 -- .../main/smoketest/repair/om-compact.robot | 17 +- .../hdds/scm/TestStorageContainerManager.java | 15 +- .../hadoop/ozone/TestSecureOzoneCluster.java | 17 +- .../hadoop/ozone/MiniOzoneClusterImpl.java | 5 + 9 files changed, 73 insertions(+), 288 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManagerImpl.java index cb3e66c7e826..d9fffeec974c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManagerImpl.java @@ -97,29 +97,10 @@ public SCMHAManagerImpl(final ConfigurationSource conf, @VisibleForTesting protected SCMSnapshotProvider newScmSnapshotProvider( StorageContainerManager storageContainerManager) { - - // Determine startup option based on snapshot directory existence - String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory( - storageContainerManager.getConfiguration()); - File snapshotDirFile = new File(snapshotDir); - - SCMSnapshotProvider.StartupOption startupOption; - if (snapshotDirFile.exists()) { - // Snapshot directory exists -> use NORMAL mode - startupOption = SCMSnapshotProvider.StartupOption.NORMAL; - } else { - // Snapshot directory does not exist -> use FORMAT mode - startupOption = SCMSnapshotProvider.StartupOption.FORMAT; - } - - LOG.info("Creating SCMSnapshotProvider with {} mode. Snapshot directory: {}", - startupOption, snapshotDir); - return new SCMSnapshotProvider( storageContainerManager.getConfiguration(), storageContainerManager.getSCMHANodeDetails().getPeerNodeDetails(), - storageContainerManager.getScmCertificateClient(), - startupOption); + storageContainerManager.getScmCertificateClient()); } /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMSnapshotProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMSnapshotProvider.java index 57647133c8c0..17f8259d76b0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMSnapshotProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMSnapshotProvider.java @@ -20,6 +20,7 @@ import com.google.common.annotations.VisibleForTesting; import java.io.File; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.HashMap; @@ -29,7 +30,6 @@ import org.apache.commons.io.FileUtils; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdds.HddsConfigKeys; -import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.utils.db.DBCheckpoint; @@ -55,51 +55,10 @@ public class SCMSnapshotProvider { private final CertificateClient scmCertificateClient; - /** - * Startup options for SCM snapshot provider. - */ - public enum StartupOption { - /** - * FORMAT mode: Ratis snapshot directory should not exist. - * Will create and initialize a new Ratis snapshot directory. - */ - FORMAT, - /** - * NORMAL mode: Ratis snapshot directory should already exist. - * Will read from existing Ratis snapshot directory. - */ - NORMAL - } - - /** - * Creates SCMSnapshotProvider with default NORMAL startup option. - * This constructor is used in most scenarios where the Ratis snapshot - * directory is expected to already exist. - * - * @param conf Configuration source - * @param peerNodes List of peer SCM nodes - * @param scmCertificateClient Certificate client for secure communication - */ public SCMSnapshotProvider(ConfigurationSource conf, List peerNodes, CertificateClient scmCertificateClient) { - this(conf, peerNodes, scmCertificateClient, StartupOption.NORMAL); - } - - /** - * Creates SCMSnapshotProvider with specified startup option. - * - * @param conf Configuration source - * @param peerNodes List of peer SCM nodes - * @param scmCertificateClient Certificate client for secure communication - * @param startupOption Startup mode: FORMAT (create new) or NORMAL (use existing) - */ - public SCMSnapshotProvider(ConfigurationSource conf, - List peerNodes, - CertificateClient scmCertificateClient, - StartupOption startupOption) { - LOG.info("Initializing SCM Snapshot Provider with startup option: {}", - startupOption); + LOG.info("Initializing SCM Snapshot Provider"); this.conf = conf; this.scmCertificateClient = scmCertificateClient; @@ -116,48 +75,24 @@ public SCMSnapshotProvider(ConfigurationSource conf, throw new IllegalArgumentException("SCM Ratis snapshot directory must be defined."); } - File ratisDir = new File(scmRatisDirectory); - File snapshotDir = new File(scmSnapshotDirectory); - // Ratis storage directory should already be created by SCMRatisServerImpl // or by init/bootstrap commands. SCMSnapshotProvider is NOT responsible for creating it. - if (!ratisDir.exists()) { + Path ratisPath = Paths.get(scmRatisDirectory); + if (!Files.isDirectory(ratisPath)) { throw new IllegalStateException( - "Ratis storage directory does not exist: " + ratisDir.getAbsolutePath() - + ". It should have been created by SCMRatisServerImpl.initialize()."); + "Ratis storage directory does not exist: " + scmRatisDirectory + + ". Please run 'ozone scm --init' or 'ozone scm --bootstrap' first."); } - // Handle snapshot directory based on startup option - switch (startupOption) { - case FORMAT: - // FORMAT mode: snapshot directory should NOT exist - if (snapshotDir.exists()) { - throw new IllegalStateException( - "Cannot format: Ratis snapshot directory already exists: " - + snapshotDir.getAbsolutePath()); - } - // Create new snapshot directory - this.scmSnapshotDir = HddsUtils.createDir(scmSnapshotDirectory); - LOG.info("Formatted: created new Ratis snapshot directory at {}", - this.scmSnapshotDir.getAbsolutePath()); - break; - - case NORMAL: - // NORMAL mode: snapshot directory MUST exist - if (!snapshotDir.exists()) { - throw new IllegalStateException( - "Ratis snapshot directory does not exist: " - + snapshotDir.getAbsolutePath() - + ". Please run 'ozone scm --init' first."); - } - this.scmSnapshotDir = snapshotDir; - LOG.info("Using existing Ratis snapshot directory at {}", - this.scmSnapshotDir.getAbsolutePath()); - break; - - default: - throw new IllegalArgumentException("Unknown startup option: " + startupOption); + // Ratis snapshot directory should already exist (created by --init or --bootstrap) + Path snapshotPath = Paths.get(scmSnapshotDirectory); + if (!Files.isDirectory(snapshotPath)) { + throw new IllegalStateException( + "Ratis snapshot directory does not exist: " + scmSnapshotDirectory + + ". Please run 'ozone scm --init' or 'ozone scm --bootstrap' first."); } + this.scmSnapshotDir = snapshotPath.toFile(); + LOG.info("Using Ratis snapshot directory at {}", scmSnapshotDirectory); // Initialize peer nodes map if (peerNodes != null) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index def3dbb7ad62..e8eaa21eb4f1 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -32,6 +32,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.protobuf.BlockingService; +import java.io.File; import java.io.IOException; import java.math.BigInteger; import java.net.InetAddress; @@ -374,8 +375,25 @@ private StorageContainerManager(OzoneConfiguration conf, // This is for the clusters which got upgraded from older version of Ozone. // We enable Ratis by default. if (!scmStorageConfig.isSCMHAEnabled()) { + // Create Ratis snapshot directory for upgrade scenario. + // This must be done before initializeRatis() since SCMSnapshotProvider + // expects the directory to already exist during normal startup. + String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); + HddsUtils.createDir(snapshotDir); + LOG.info("Upgrade: created Ratis snapshot directory: {}", snapshotDir); + // Since we have initialized Ratis, we have to reload StorageConfig scmStorageConfig = initializeRatis(conf); + } else { + // For clusters upgraded from older versions that already have SCMHAEnabled + // but may not have the snapshot directory (it was auto-created by + // SCMSnapshotProvider in older versions), ensure the directory exists. + String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); + File snapshotDirFile = new File(snapshotDir); + if (!snapshotDirFile.exists()) { + HddsUtils.createDir(snapshotDir); + LOG.info("Created missing Ratis snapshot directory for upgraded cluster: {}", snapshotDir); + } } threadNamePrefix = getScmNodeDetails().threadNamePrefix(); @@ -1300,6 +1318,14 @@ public static boolean scmInit(OzoneConfiguration conf, scmStorageConfig.setPrimaryScmNodeId(scmStorageConfig.getScmId()); scmStorageConfig.initialize(); + + // Create Ratis snapshot directory during init. + // This must be done before SCM starts normally since SCMSnapshotProvider + // expects the directory to already exist. + String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); + HddsUtils.createDir(snapshotDir); + LOG.info("Init: created Ratis snapshot directory: {}", snapshotDir); + scmStorageConfig = initializeRatis(conf); LOG.info("SCM initialization succeeded. Current cluster id for sd={}" @@ -1351,12 +1377,6 @@ private static SCMStorageConfig initializeRatis(OzoneConfiguration conf) SCMRatisServerImpl.initialize(storageConfig.getClusterID(), storageConfig.getScmId(), haDetails.getLocalNodeDetails(), conf); - // Create Ratis snapshot directory during initialization - // This ensures the directory exists before SCM starts normally - String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); - HddsUtils.createDir(snapshotDir); - LOG.info("Created Ratis snapshot directory: {}", snapshotDir); - storageConfig.setSCMHAFlag(true); storageConfig.setPrimaryScmNodeId(storageConfig.getScmId()); storageConfig.forceInitialize(); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMSnapshotProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMSnapshotProvider.java index bedb308d9cbf..fd5e35f48e6b 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMSnapshotProvider.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMSnapshotProvider.java @@ -24,7 +24,6 @@ import static org.mockito.Mockito.mock; import java.io.File; -import java.io.IOException; import java.nio.file.Path; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -34,17 +33,15 @@ import org.junit.jupiter.api.io.TempDir; /** - * Test cases for {@link SCMSnapshotProvider} with different startup modes. + * Test cases for {@link SCMSnapshotProvider}. */ class TestSCMSnapshotProvider { /** - * Test 1: Default constructor (no startup option parameter). - * Should use NORMAL mode by default. - * Expects both Ratis storage and snapshot directories to exist. + * Test constructor succeeds when both Ratis storage and snapshot directories exist. */ @Test - void testDefaultConstructor(@TempDir Path tempDir) throws IOException { + void testConstructorSucceedsWithExistingDirectories(@TempDir Path tempDir) { // Setup: Create both directories (simulating initialized SCM) OzoneConfiguration conf = createConfig(tempDir); String ratisDir = SCMHAUtils.getSCMRatisDirectory(conf); @@ -55,11 +52,11 @@ void testDefaultConstructor(@TempDir Path tempDir) throws IOException { CertificateClient certClient = mock(CertificateClient.class); - // Test: Use default constructor (should use NORMAL mode) + // Test SCMSnapshotProvider provider = new SCMSnapshotProvider( conf, null, certClient); - // Verify: Provider created successfully + // Verify assertNotNull(provider); assertNotNull(provider.getScmSnapshotDir()); assertTrue(provider.getScmSnapshotDir().exists()); @@ -67,11 +64,10 @@ void testDefaultConstructor(@TempDir Path tempDir) throws IOException { } /** - * Test 2: Default constructor when snapshot directory does not exist. - * Should fail because NORMAL mode expects directory to exist. + * Test constructor fails when snapshot directory does not exist. */ @Test - void testDefaultConstructorFailsWhenSnapshotDirMissing(@TempDir Path tempDir) { + void testConstructorFailsWhenSnapshotDirMissing(@TempDir Path tempDir) { // Setup: Create only Ratis storage directory, not snapshot directory OzoneConfiguration conf = createConfig(tempDir); String ratisDir = SCMHAUtils.getSCMRatisDirectory(conf); @@ -79,7 +75,7 @@ void testDefaultConstructorFailsWhenSnapshotDirMissing(@TempDir Path tempDir) { CertificateClient certClient = mock(CertificateClient.class); - // Test & Verify: Should throw exception + // Test & Verify IllegalStateException exception = assertThrows( IllegalStateException.class, () -> new SCMSnapshotProvider(conf, null, certClient)); @@ -88,142 +84,18 @@ void testDefaultConstructorFailsWhenSnapshotDirMissing(@TempDir Path tempDir) { } /** - * Test 3: Explicit NORMAL mode with existing directories. - * Should succeed when both directories exist. + * Test constructor fails when Ratis storage directory does not exist. */ @Test - void testNormalModeWithExistingDirectories(@TempDir Path tempDir) throws IOException { - // Setup: Create both directories - OzoneConfiguration conf = createConfig(tempDir); - String ratisDir = SCMHAUtils.getSCMRatisDirectory(conf); - String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); - - assertTrue(new File(ratisDir).mkdirs()); - assertTrue(new File(snapshotDir).mkdirs()); - - CertificateClient certClient = mock(CertificateClient.class); - - // Test: Explicitly use NORMAL mode - SCMSnapshotProvider provider = new SCMSnapshotProvider( - conf, null, certClient, SCMSnapshotProvider.StartupOption.NORMAL); - - // Verify: Provider created successfully - assertNotNull(provider); - assertNotNull(provider.getScmSnapshotDir()); - assertTrue(provider.getScmSnapshotDir().exists()); - } - - /** - * Test 4: Explicit NORMAL mode when snapshot directory missing. - * Should fail with clear error message. - */ - @Test - void testNormalModeFailsWhenSnapshotDirMissing(@TempDir Path tempDir) { - // Setup: Create only Ratis storage directory - OzoneConfiguration conf = createConfig(tempDir); - String ratisDir = SCMHAUtils.getSCMRatisDirectory(conf); - assertTrue(new File(ratisDir).mkdirs()); - - CertificateClient certClient = mock(CertificateClient.class); - - // Test & Verify: Should throw exception - IllegalStateException exception = assertThrows( - IllegalStateException.class, - () -> new SCMSnapshotProvider( - conf, null, certClient, SCMSnapshotProvider.StartupOption.NORMAL)); - - assertTrue(exception.getMessage().contains("Ratis snapshot directory does not exist")); - assertTrue(exception.getMessage().contains("Please run 'ozone scm --init' first")); - } - - /** - * Test 5: Explicit NORMAL mode when Ratis storage directory missing. - * Should fail because SCMRatisServerImpl should have created it. - */ - @Test - void testNormalModeFailsWhenRatisDirMissing(@TempDir Path tempDir) { + void testConstructorFailsWhenRatisDirMissing(@TempDir Path tempDir) { // Setup: Don't create any directories OzoneConfiguration conf = createConfig(tempDir); CertificateClient certClient = mock(CertificateClient.class); - // Test & Verify: Should throw exception - IllegalStateException exception = assertThrows( - IllegalStateException.class, - () -> new SCMSnapshotProvider( - conf, null, certClient, SCMSnapshotProvider.StartupOption.NORMAL)); - - assertTrue(exception.getMessage().contains("Ratis storage directory does not exist")); - assertTrue(exception.getMessage().contains("SCMRatisServerImpl.initialize()")); - } - - /** - * Test 6: FORMAT mode creates new snapshot directory. - * Should succeed when snapshot directory does not exist. - */ - @Test - void testFormatModeCreatesNewDirectory(@TempDir Path tempDir) throws IOException { - // Setup: Create only Ratis storage directory (as SCMRatisServerImpl would) - OzoneConfiguration conf = createConfig(tempDir); - String ratisDir = SCMHAUtils.getSCMRatisDirectory(conf); - String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); - - assertTrue(new File(ratisDir).mkdirs()); - // Snapshot directory should NOT exist for FORMAT mode - - CertificateClient certClient = mock(CertificateClient.class); - - // Test: Use FORMAT mode - SCMSnapshotProvider provider = new SCMSnapshotProvider( - conf, null, certClient, SCMSnapshotProvider.StartupOption.FORMAT); - - // Verify: Provider created and snapshot directory was created - assertNotNull(provider); - assertNotNull(provider.getScmSnapshotDir()); - assertTrue(provider.getScmSnapshotDir().exists()); - assertEquals(snapshotDir, provider.getScmSnapshotDir().getAbsolutePath()); - } - - /** - * Test 7: FORMAT mode fails when snapshot directory already exists. - * Should prevent accidental data loss. - */ - @Test - void testFormatModeFailsWhenSnapshotDirExists(@TempDir Path tempDir) { - // Setup: Create both directories (simulating existing SCM) - OzoneConfiguration conf = createConfig(tempDir); - String ratisDir = SCMHAUtils.getSCMRatisDirectory(conf); - String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); - - assertTrue(new File(ratisDir).mkdirs()); - assertTrue(new File(snapshotDir).mkdirs()); // This exists! - - CertificateClient certClient = mock(CertificateClient.class); - - // Test & Verify: FORMAT should fail when directory exists - IllegalStateException exception = assertThrows( - IllegalStateException.class, - () -> new SCMSnapshotProvider( - conf, null, certClient, SCMSnapshotProvider.StartupOption.FORMAT)); - - assertTrue(exception.getMessage().contains("Cannot format")); - assertTrue(exception.getMessage().contains("already exists")); - } - - /** - * Test 8: FORMAT mode fails when Ratis storage directory missing. - * Even in FORMAT mode, Ratis storage dir should exist (created by SCMRatisServerImpl). - */ - @Test - void testFormatModeFailsWhenRatisDirMissing(@TempDir Path tempDir) { - // Setup: Don't create Ratis storage directory - OzoneConfiguration conf = createConfig(tempDir); - CertificateClient certClient = mock(CertificateClient.class); - - // Test & Verify: Should throw exception + // Test & Verify IllegalStateException exception = assertThrows( IllegalStateException.class, - () -> new SCMSnapshotProvider( - conf, null, certClient, SCMSnapshotProvider.StartupOption.FORMAT)); + () -> new SCMSnapshotProvider(conf, null, certClient)); assertTrue(exception.getMessage().contains("Ratis storage directory does not exist")); } diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-repair-tools.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-repair-tools.sh index df92e93c59cc..ca6fa5a0cbd8 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-repair-tools.sh +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-repair-tools.sh @@ -98,14 +98,5 @@ echo "Test keys created" echo "Restarting OM after key creation to flush and generate sst files" docker restart "${om_container}" -wait_for_om_leader - -echo "Deleting test keys to create tombstones" -docker exec "${om_container}" bash -c "kinit -kt /etc/security/keytabs/testuser.keytab testuser/om@EXAMPLE.COM && ozone fs -rm -R -skipTrash ofs://omservice/vol1/bucket1" -echo "Test keys deleted" - -echo "Restarting OM after key deletion to flush tombstones to sst files" -docker restart "${om_container}" -wait_for_om_leader execute_robot_test ${OM} repair/om-compact.robot diff --git a/hadoop-ozone/dist/src/main/smoketest/repair/om-compact.robot b/hadoop-ozone/dist/src/main/smoketest/repair/om-compact.robot index 782304ed40f7..164cf410dcbc 100644 --- a/hadoop-ozone/dist/src/main/smoketest/repair/om-compact.robot +++ b/hadoop-ozone/dist/src/main/smoketest/repair/om-compact.robot @@ -36,20 +36,19 @@ Compact OM DB Column Family [Arguments] ${column_family} Execute ozone repair om compact --cf=${column_family} --service-id ${OM_SERVICE_ID} --node-id om1 -SST Size Should Be Reduced - [Arguments] ${original_size} - ${current_size} = Get OM DB SST Files Size - Should Be True ${current_size} < ${original_size} - ... OM DB size should be reduced after compaction. Before: ${original_size}, After: ${current_size} - *** Test Cases *** Testing OM DB Size Reduction After Compaction - # Test keys are already created, deleted and flushed by the test script + # Test keys are already created and flushed + # Delete keys to create tombstones that need compaction + Delete Test Keys + ${size_before_compaction} = Get OM DB SST Files Size Compact OM DB Column Family fileTable Compact OM DB Column Family deletedTable Compact OM DB Column Family deletedDirectoryTable + + ${size_after_compaction} = Get OM DB SST Files Size - # Compaction is asynchronous, poll until size is reduced - Wait Until Keyword Succeeds 60sec 5sec SST Size Should Be Reduced ${size_before_compaction} + Should Be True ${size_after_compaction} < ${size_before_compaction} + ... OM DB size should be reduced after compaction. Before: ${size_before_compaction}, After: ${size_after_compaction} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java index 8af00a8b0ede..7afaa10c35b0 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java @@ -94,7 +94,6 @@ import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.ha.RatisUtil; import org.apache.hadoop.hdds.scm.ha.SCMContext; -import org.apache.hadoop.hdds.scm.ha.SCMHANodeDetails; import org.apache.hadoop.hdds.scm.ha.SCMHAUtils; import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl; import org.apache.hadoop.hdds.scm.node.DatanodeInfo; @@ -603,17 +602,11 @@ public void testScmInfo(@TempDir Path tempDir) throws Exception { Path scmPath = tempDir.resolve("scm-meta"); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, scmPath.toString()); - SCMStorageConfig scmStore = new SCMStorageConfig(conf); String clusterId = UUID.randomUUID().toString(); - String scmId = UUID.randomUUID().toString(); - scmStore.setClusterId(clusterId); - scmStore.setScmId(scmId); - scmStore.setSCMHAFlag(true); - // writes the version file properties - scmStore.initialize(); - SCMRatisServerImpl.initialize(clusterId, scmId, - SCMHANodeDetails.loadSCMHAConfig(conf, scmStore) - .getLocalNodeDetails(), conf); + // Use scmInit to initialize SCM properly (creates all required directories) + StorageContainerManager.scmInit(conf, clusterId); + SCMStorageConfig scmStore = new SCMStorageConfig(conf); + String scmId = scmStore.getScmId(); StorageContainerManager scm = HddsTestUtils.getScmSimple(conf); try { scm.start(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java index 35c527180bf3..9b11ed744dbc 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java @@ -105,9 +105,6 @@ import org.apache.hadoop.hdds.scm.ScmConfig; import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.client.ScmTopologyClient; -import org.apache.hadoop.hdds.scm.ha.HASecurityUtils; -import org.apache.hadoop.hdds.scm.ha.SCMHANodeDetails; -import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl; import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol; import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; import org.apache.hadoop.hdds.scm.server.SCMHTTPServerConfig; @@ -444,18 +441,10 @@ private void initSCM() throws IOException { Files.createDirectories(scmPath); conf.set(OZONE_METADATA_DIRS, scmPath.toString()); + // Use scmInit to properly initialize SCM with all required directories + StorageContainerManager.scmInit(conf, clusterId); SCMStorageConfig scmStore = new SCMStorageConfig(conf); - scmStore.setClusterId(clusterId); - scmStore.setScmId(scmId); - scmStore.setSCMHAFlag(true); - HASecurityUtils.initializeSecurity(scmStore, conf, - InetAddress.getLocalHost().getHostName(), true); - scmStore.setPrimaryScmNodeId(scmId); - // writes the version file properties - scmStore.initialize(); - SCMRatisServerImpl.initialize(clusterId, scmId, - SCMHANodeDetails.loadSCMHAConfig(conf, scmStore) - .getLocalNodeDetails(), conf); + scmId = scmStore.getScmId(); /* * As all these processes run inside the same JVM, there are issues around diff --git a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index a147a53bdf41..dae0d4a48225 100644 --- a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -40,6 +40,7 @@ import java.util.concurrent.TimeoutException; import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.annotation.InterfaceAudience; import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -48,6 +49,7 @@ import org.apache.hadoop.hdds.scm.HddsTestUtils; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.ha.SCMHANodeDetails; +import org.apache.hadoop.hdds.scm.ha.SCMHAUtils; import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl; import org.apache.hadoop.hdds.scm.node.NodeStatus; import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; @@ -621,6 +623,9 @@ protected void initializeScmStorage(SCMStorageConfig scmStore) SCMHANodeDetails.loadSCMHAConfig(conf, scmStore) .getLocalNodeDetails(), conf); + // Create Ratis snapshot directory for test environment + String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); + HddsUtils.createDir(snapshotDir); } void initializeOmStorage(OMStorage omStorage) throws IOException {