diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManagerImpl.java index 0cc600160e11..d9fffeec974c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAManagerImpl.java @@ -95,8 +95,10 @@ public SCMHAManagerImpl(final ConfigurationSource conf, } @VisibleForTesting - protected SCMSnapshotProvider newScmSnapshotProvider(StorageContainerManager storageContainerManager) { - return new SCMSnapshotProvider(storageContainerManager.getConfiguration(), + protected SCMSnapshotProvider newScmSnapshotProvider( + StorageContainerManager storageContainerManager) { + return new SCMSnapshotProvider( + storageContainerManager.getConfiguration(), storageContainerManager.getSCMHANodeDetails().getPeerNodeDetails(), storageContainerManager.getScmCertificateClient()); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMSnapshotProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMSnapshotProvider.java index ac960d5c1c7e..17f8259d76b0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMSnapshotProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMSnapshotProvider.java @@ -20,6 +20,7 @@ import com.google.common.annotations.VisibleForTesting; import java.io.File; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.HashMap; @@ -29,7 +30,6 @@ import org.apache.commons.io.FileUtils; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdds.HddsConfigKeys; -import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.utils.db.DBCheckpoint; @@ -61,18 +61,40 @@ public SCMSnapshotProvider(ConfigurationSource conf, LOG.info("Initializing SCM Snapshot Provider"); this.conf = conf; this.scmCertificateClient = scmCertificateClient; - // Create Ratis storage dir + + // Get directory paths from configuration String scmRatisDirectory = SCMHAUtils.getSCMRatisDirectory(conf); + String scmSnapshotDirectory = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); if (scmRatisDirectory == null || scmRatisDirectory.isEmpty()) { throw new IllegalArgumentException(HddsConfigKeys.OZONE_METADATA_DIRS + " must be defined."); } - HddsUtils.createDir(scmRatisDirectory); - // Create Ratis snapshot dir - scmSnapshotDir = HddsUtils.createDir( - SCMHAUtils.getSCMRatisSnapshotDirectory(conf)); + if (scmSnapshotDirectory == null || scmSnapshotDirectory.isEmpty()) { + throw new IllegalArgumentException("SCM Ratis snapshot directory must be defined."); + } + + // Ratis storage directory should already be created by SCMRatisServerImpl + // or by init/bootstrap commands. SCMSnapshotProvider is NOT responsible for creating it. + Path ratisPath = Paths.get(scmRatisDirectory); + if (!Files.isDirectory(ratisPath)) { + throw new IllegalStateException( + "Ratis storage directory does not exist: " + scmRatisDirectory + + ". Please run 'ozone scm --init' or 'ozone scm --bootstrap' first."); + } + + // Ratis snapshot directory should already exist (created by --init or --bootstrap) + Path snapshotPath = Paths.get(scmSnapshotDirectory); + if (!Files.isDirectory(snapshotPath)) { + throw new IllegalStateException( + "Ratis snapshot directory does not exist: " + scmSnapshotDirectory + + ". Please run 'ozone scm --init' or 'ozone scm --bootstrap' first."); + } + this.scmSnapshotDir = snapshotPath.toFile(); + LOG.info("Using Ratis snapshot directory at {}", scmSnapshotDirectory); + + // Initialize peer nodes map if (peerNodes != null) { this.peerNodesMap = new HashMap<>(); for (SCMNodeDetails peerNode : peerNodes) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index bb6d59e0d054..e8eaa21eb4f1 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -32,6 +32,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.protobuf.BlockingService; +import java.io.File; import java.io.IOException; import java.math.BigInteger; import java.net.InetAddress; @@ -374,8 +375,25 @@ private StorageContainerManager(OzoneConfiguration conf, // This is for the clusters which got upgraded from older version of Ozone. // We enable Ratis by default. if (!scmStorageConfig.isSCMHAEnabled()) { + // Create Ratis snapshot directory for upgrade scenario. + // This must be done before initializeRatis() since SCMSnapshotProvider + // expects the directory to already exist during normal startup. + String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); + HddsUtils.createDir(snapshotDir); + LOG.info("Upgrade: created Ratis snapshot directory: {}", snapshotDir); + // Since we have initialized Ratis, we have to reload StorageConfig scmStorageConfig = initializeRatis(conf); + } else { + // For clusters upgraded from older versions that already have SCMHAEnabled + // but may not have the snapshot directory (it was auto-created by + // SCMSnapshotProvider in older versions), ensure the directory exists. + String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); + File snapshotDirFile = new File(snapshotDir); + if (!snapshotDirFile.exists()) { + HddsUtils.createDir(snapshotDir); + LOG.info("Created missing Ratis snapshot directory for upgraded cluster: {}", snapshotDir); + } } threadNamePrefix = getScmNodeDetails().threadNamePrefix(); @@ -1210,6 +1228,16 @@ public static boolean scmBootstrap(OzoneConfiguration conf) scmStorageConfig.setPrimaryScmNodeId(scmInfo.getScmId()); scmStorageConfig.setSCMHAFlag(true); scmStorageConfig.initialize(); + + // Create Ratis storage and snapshot directories during bootstrap + // This ensures they exist before the bootstrapped SCM starts normally + String ratisStorageDir = SCMHAUtils.getSCMRatisDirectory(conf); + String ratisSnapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); + HddsUtils.createDir(ratisStorageDir); + HddsUtils.createDir(ratisSnapshotDir); + LOG.info("Created Ratis storage directory: {}", ratisStorageDir); + LOG.info("Created Ratis snapshot directory: {}", ratisSnapshotDir); + LOG.info("SCM BootStrap is successful for ClusterID {}, SCMID {}", scmInfo.getClusterId(), scmStorageConfig.getScmId()); LOG.info("Primary SCM Node ID {}", @@ -1290,6 +1318,14 @@ public static boolean scmInit(OzoneConfiguration conf, scmStorageConfig.setPrimaryScmNodeId(scmStorageConfig.getScmId()); scmStorageConfig.initialize(); + + // Create Ratis snapshot directory during init. + // This must be done before SCM starts normally since SCMSnapshotProvider + // expects the directory to already exist. + String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); + HddsUtils.createDir(snapshotDir); + LOG.info("Init: created Ratis snapshot directory: {}", snapshotDir); + scmStorageConfig = initializeRatis(conf); LOG.info("SCM initialization succeeded. Current cluster id for sd={}" @@ -1340,6 +1376,7 @@ private static SCMStorageConfig initializeRatis(OzoneConfiguration conf) final SCMHANodeDetails haDetails = SCMHANodeDetails.loadSCMHAConfig(conf, storageConfig); SCMRatisServerImpl.initialize(storageConfig.getClusterID(), storageConfig.getScmId(), haDetails.getLocalNodeDetails(), conf); + storageConfig.setSCMHAFlag(true); storageConfig.setPrimaryScmNodeId(storageConfig.getScmId()); storageConfig.forceInitialize(); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMSnapshotProvider.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMSnapshotProvider.java new file mode 100644 index 000000000000..fd5e35f48e6b --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMSnapshotProvider.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.ha; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; + +import java.io.File; +import java.nio.file.Path; +import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +/** + * Test cases for {@link SCMSnapshotProvider}. + */ +class TestSCMSnapshotProvider { + + /** + * Test constructor succeeds when both Ratis storage and snapshot directories exist. + */ + @Test + void testConstructorSucceedsWithExistingDirectories(@TempDir Path tempDir) { + // Setup: Create both directories (simulating initialized SCM) + OzoneConfiguration conf = createConfig(tempDir); + String ratisDir = SCMHAUtils.getSCMRatisDirectory(conf); + String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); + + assertTrue(new File(ratisDir).mkdirs()); + assertTrue(new File(snapshotDir).mkdirs()); + + CertificateClient certClient = mock(CertificateClient.class); + + // Test + SCMSnapshotProvider provider = new SCMSnapshotProvider( + conf, null, certClient); + + // Verify + assertNotNull(provider); + assertNotNull(provider.getScmSnapshotDir()); + assertTrue(provider.getScmSnapshotDir().exists()); + assertEquals(snapshotDir, provider.getScmSnapshotDir().getAbsolutePath()); + } + + /** + * Test constructor fails when snapshot directory does not exist. + */ + @Test + void testConstructorFailsWhenSnapshotDirMissing(@TempDir Path tempDir) { + // Setup: Create only Ratis storage directory, not snapshot directory + OzoneConfiguration conf = createConfig(tempDir); + String ratisDir = SCMHAUtils.getSCMRatisDirectory(conf); + assertTrue(new File(ratisDir).mkdirs()); + + CertificateClient certClient = mock(CertificateClient.class); + + // Test & Verify + IllegalStateException exception = assertThrows( + IllegalStateException.class, + () -> new SCMSnapshotProvider(conf, null, certClient)); + + assertTrue(exception.getMessage().contains("Ratis snapshot directory does not exist")); + } + + /** + * Test constructor fails when Ratis storage directory does not exist. + */ + @Test + void testConstructorFailsWhenRatisDirMissing(@TempDir Path tempDir) { + // Setup: Don't create any directories + OzoneConfiguration conf = createConfig(tempDir); + CertificateClient certClient = mock(CertificateClient.class); + + // Test & Verify + IllegalStateException exception = assertThrows( + IllegalStateException.class, + () -> new SCMSnapshotProvider(conf, null, certClient)); + + assertTrue(exception.getMessage().contains("Ratis storage directory does not exist")); + } + + /** + * Helper method to create OzoneConfiguration with temp directories. + */ + private OzoneConfiguration createConfig(Path tempDir) { + OzoneConfiguration conf = new OzoneConfiguration(); + + String metadataDir = tempDir.resolve("metadata").toString(); + String ratisStorageDir = tempDir.resolve("ratis").toString(); + String ratisSnapshotDir = tempDir.resolve("ratis-snapshot").toString(); + + conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, metadataDir); + conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_STORAGE_DIR, ratisStorageDir); + conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_SNAPSHOT_DIR, ratisSnapshotDir); + + return conf; + } +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestScmStartupSlvLessThanMlv.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestScmStartupSlvLessThanMlv.java index 984306fb8a42..25de21766947 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestScmStartupSlvLessThanMlv.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/upgrade/TestScmStartupSlvLessThanMlv.java @@ -53,6 +53,13 @@ public void testStartupSlvLessThanMlv(@TempDir Path tempDir) File scmSubdir = tempDir.resolve("scm").resolve("current").toFile(); assertTrue(scmSubdir.mkdirs()); + // Create Ratis directories to simulate a realistic downgrade scenario + // where SCM was previously running with a newer version + File ratisDir = tempDir.resolve("scm.ratis").toFile(); + File snapshotDir = tempDir.resolve("scm.ratis.snapshot").toFile(); + assertTrue(ratisDir.mkdirs()); + assertTrue(snapshotDir.mkdirs()); + OzoneConfiguration conf = new OzoneConfiguration(); conf.set(ScmConfigKeys.OZONE_SCM_DB_DIRS, tempDir.toAbsolutePath().toString()); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java index 8af00a8b0ede..7afaa10c35b0 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java @@ -94,7 +94,6 @@ import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.ha.RatisUtil; import org.apache.hadoop.hdds.scm.ha.SCMContext; -import org.apache.hadoop.hdds.scm.ha.SCMHANodeDetails; import org.apache.hadoop.hdds.scm.ha.SCMHAUtils; import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl; import org.apache.hadoop.hdds.scm.node.DatanodeInfo; @@ -603,17 +602,11 @@ public void testScmInfo(@TempDir Path tempDir) throws Exception { Path scmPath = tempDir.resolve("scm-meta"); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, scmPath.toString()); - SCMStorageConfig scmStore = new SCMStorageConfig(conf); String clusterId = UUID.randomUUID().toString(); - String scmId = UUID.randomUUID().toString(); - scmStore.setClusterId(clusterId); - scmStore.setScmId(scmId); - scmStore.setSCMHAFlag(true); - // writes the version file properties - scmStore.initialize(); - SCMRatisServerImpl.initialize(clusterId, scmId, - SCMHANodeDetails.loadSCMHAConfig(conf, scmStore) - .getLocalNodeDetails(), conf); + // Use scmInit to initialize SCM properly (creates all required directories) + StorageContainerManager.scmInit(conf, clusterId); + SCMStorageConfig scmStore = new SCMStorageConfig(conf); + String scmId = scmStore.getScmId(); StorageContainerManager scm = HddsTestUtils.getScmSimple(conf); try { scm.start(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java index 35c527180bf3..9b11ed744dbc 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestSecureOzoneCluster.java @@ -105,9 +105,6 @@ import org.apache.hadoop.hdds.scm.ScmConfig; import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.client.ScmTopologyClient; -import org.apache.hadoop.hdds.scm.ha.HASecurityUtils; -import org.apache.hadoop.hdds.scm.ha.SCMHANodeDetails; -import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl; import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol; import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; import org.apache.hadoop.hdds.scm.server.SCMHTTPServerConfig; @@ -444,18 +441,10 @@ private void initSCM() throws IOException { Files.createDirectories(scmPath); conf.set(OZONE_METADATA_DIRS, scmPath.toString()); + // Use scmInit to properly initialize SCM with all required directories + StorageContainerManager.scmInit(conf, clusterId); SCMStorageConfig scmStore = new SCMStorageConfig(conf); - scmStore.setClusterId(clusterId); - scmStore.setScmId(scmId); - scmStore.setSCMHAFlag(true); - HASecurityUtils.initializeSecurity(scmStore, conf, - InetAddress.getLocalHost().getHostName(), true); - scmStore.setPrimaryScmNodeId(scmId); - // writes the version file properties - scmStore.initialize(); - SCMRatisServerImpl.initialize(clusterId, scmId, - SCMHANodeDetails.loadSCMHAConfig(conf, scmStore) - .getLocalNodeDetails(), conf); + scmId = scmStore.getScmId(); /* * As all these processes run inside the same JVM, there are issues around diff --git a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index a147a53bdf41..dae0d4a48225 100644 --- a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -40,6 +40,7 @@ import java.util.concurrent.TimeoutException; import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.annotation.InterfaceAudience; import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -48,6 +49,7 @@ import org.apache.hadoop.hdds.scm.HddsTestUtils; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.ha.SCMHANodeDetails; +import org.apache.hadoop.hdds.scm.ha.SCMHAUtils; import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl; import org.apache.hadoop.hdds.scm.node.NodeStatus; import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; @@ -621,6 +623,9 @@ protected void initializeScmStorage(SCMStorageConfig scmStore) SCMHANodeDetails.loadSCMHAConfig(conf, scmStore) .getLocalNodeDetails(), conf); + // Create Ratis snapshot directory for test environment + String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf); + HddsUtils.createDir(snapshotDir); } void initializeOmStorage(OMStorage omStorage) throws IOException {