Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,10 @@ public SCMHAManagerImpl(final ConfigurationSource conf,
}

@VisibleForTesting
protected SCMSnapshotProvider newScmSnapshotProvider(StorageContainerManager storageContainerManager) {
return new SCMSnapshotProvider(storageContainerManager.getConfiguration(),
protected SCMSnapshotProvider newScmSnapshotProvider(
StorageContainerManager storageContainerManager) {
return new SCMSnapshotProvider(
storageContainerManager.getConfiguration(),
Comment on lines -98 to +101
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has only whitespace changes. Please revert them.

storageContainerManager.getSCMHANodeDetails().getPeerNodeDetails(),
storageContainerManager.getScmCertificateClient());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import com.google.common.annotations.VisibleForTesting;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
Expand All @@ -29,7 +30,6 @@
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.HddsUtils;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient;
import org.apache.hadoop.hdds.utils.db.DBCheckpoint;
Expand Down Expand Up @@ -61,18 +61,40 @@ public SCMSnapshotProvider(ConfigurationSource conf,
LOG.info("Initializing SCM Snapshot Provider");
this.conf = conf;
this.scmCertificateClient = scmCertificateClient;
// Create Ratis storage dir

// Get directory paths from configuration
String scmRatisDirectory = SCMHAUtils.getSCMRatisDirectory(conf);
String scmSnapshotDirectory = SCMHAUtils.getSCMRatisSnapshotDirectory(conf);

if (scmRatisDirectory == null || scmRatisDirectory.isEmpty()) {
throw new IllegalArgumentException(HddsConfigKeys.OZONE_METADATA_DIRS +
" must be defined.");
}
HddsUtils.createDir(scmRatisDirectory);

// Create Ratis snapshot dir
scmSnapshotDir = HddsUtils.createDir(
SCMHAUtils.getSCMRatisSnapshotDirectory(conf));
if (scmSnapshotDirectory == null || scmSnapshotDirectory.isEmpty()) {
throw new IllegalArgumentException("SCM Ratis snapshot directory must be defined.");
}

// Ratis storage directory should already be created by SCMRatisServerImpl
// or by init/bootstrap commands. SCMSnapshotProvider is NOT responsible for creating it.
Path ratisPath = Paths.get(scmRatisDirectory);
if (!Files.isDirectory(ratisPath)) {
throw new IllegalStateException(
"Ratis storage directory does not exist: " + scmRatisDirectory
+ ". Please run 'ozone scm --init' or 'ozone scm --bootstrap' first.");
}

// Ratis snapshot directory should already exist (created by --init or --bootstrap)
Path snapshotPath = Paths.get(scmSnapshotDirectory);
if (!Files.isDirectory(snapshotPath)) {
throw new IllegalStateException(
"Ratis snapshot directory does not exist: " + scmSnapshotDirectory
+ ". Please run 'ozone scm --init' or 'ozone scm --bootstrap' first.");
}
this.scmSnapshotDir = snapshotPath.toFile();
LOG.info("Using Ratis snapshot directory at {}", scmSnapshotDirectory);

// Initialize peer nodes map
if (peerNodes != null) {
this.peerNodesMap = new HashMap<>();
for (SCMNodeDetails peerNode : peerNodes) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.protobuf.BlockingService;
import java.io.File;
import java.io.IOException;
import java.math.BigInteger;
import java.net.InetAddress;
Expand Down Expand Up @@ -374,8 +375,25 @@ private StorageContainerManager(OzoneConfiguration conf,
// This is for the clusters which got upgraded from older version of Ozone.
// We enable Ratis by default.
if (!scmStorageConfig.isSCMHAEnabled()) {
// Create Ratis snapshot directory for upgrade scenario.
// This must be done before initializeRatis() since SCMSnapshotProvider
// expects the directory to already exist during normal startup.
String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf);
HddsUtils.createDir(snapshotDir);
LOG.info("Upgrade: created Ratis snapshot directory: {}", snapshotDir);

// Since we have initialized Ratis, we have to reload StorageConfig
scmStorageConfig = initializeRatis(conf);
} else {
// For clusters upgraded from older versions that already have SCMHAEnabled
// but may not have the snapshot directory (it was auto-created by
Comment on lines +388 to +389
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How could older versions with HA do not have the snapshot directory?

I guess it is possible only if older versions is non-HA, then it does not have the snapshot directory. Further, if it is updated to a newer version and enable HA at the same time, it could hit this case. Is it the case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Integration Test (Upgrade) covers this scenario. See driver.sh:

  1. Start old version 2.1.0 (HA cluster, SCMHAEnabled=true)
  2. Stop → switch to new version → start

In step 2, the cluster already has SCMHAEnabled=true from the old version, so it enters the else branch. The snapshot directory may not exist if the old version didn't trigger a snapshot operation before shutdown.

// SCMSnapshotProvider in older versions), ensure the directory exists.
String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf);
File snapshotDirFile = new File(snapshotDir);
if (!snapshotDirFile.exists()) {
HddsUtils.createDir(snapshotDir);
LOG.info("Created missing Ratis snapshot directory for upgraded cluster: {}", snapshotDir);
}
}

threadNamePrefix = getScmNodeDetails().threadNamePrefix();
Expand Down Expand Up @@ -1210,6 +1228,16 @@ public static boolean scmBootstrap(OzoneConfiguration conf)
scmStorageConfig.setPrimaryScmNodeId(scmInfo.getScmId());
scmStorageConfig.setSCMHAFlag(true);
scmStorageConfig.initialize();

// Create Ratis storage and snapshot directories during bootstrap
// This ensures they exist before the bootstrapped SCM starts normally
String ratisStorageDir = SCMHAUtils.getSCMRatisDirectory(conf);
String ratisSnapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf);
HddsUtils.createDir(ratisStorageDir);
HddsUtils.createDir(ratisSnapshotDir);
LOG.info("Created Ratis storage directory: {}", ratisStorageDir);
LOG.info("Created Ratis snapshot directory: {}", ratisSnapshotDir);

LOG.info("SCM BootStrap is successful for ClusterID {}, SCMID {}",
scmInfo.getClusterId(), scmStorageConfig.getScmId());
LOG.info("Primary SCM Node ID {}",
Expand Down Expand Up @@ -1290,6 +1318,14 @@ public static boolean scmInit(OzoneConfiguration conf,

scmStorageConfig.setPrimaryScmNodeId(scmStorageConfig.getScmId());
scmStorageConfig.initialize();

// Create Ratis snapshot directory during init.
// This must be done before SCM starts normally since SCMSnapshotProvider
// expects the directory to already exist.
String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf);
HddsUtils.createDir(snapshotDir);
LOG.info("Init: created Ratis snapshot directory: {}", snapshotDir);

scmStorageConfig = initializeRatis(conf);

LOG.info("SCM initialization succeeded. Current cluster id for sd={}"
Expand Down Expand Up @@ -1340,6 +1376,7 @@ private static SCMStorageConfig initializeRatis(OzoneConfiguration conf)
final SCMHANodeDetails haDetails = SCMHANodeDetails.loadSCMHAConfig(conf, storageConfig);
SCMRatisServerImpl.initialize(storageConfig.getClusterID(),
storageConfig.getScmId(), haDetails.getLocalNodeDetails(), conf);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please revert this whitespace change..

storageConfig.setSCMHAFlag(true);
storageConfig.setPrimaryScmNodeId(storageConfig.getScmId());
storageConfig.forceInitialize();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hdds.scm.ha;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;

import java.io.File;
import java.nio.file.Path;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;

/**
* Test cases for {@link SCMSnapshotProvider}.
*/
class TestSCMSnapshotProvider {

/**
* Test constructor succeeds when both Ratis storage and snapshot directories exist.
*/
@Test
void testConstructorSucceedsWithExistingDirectories(@TempDir Path tempDir) {
// Setup: Create both directories (simulating initialized SCM)
OzoneConfiguration conf = createConfig(tempDir);
String ratisDir = SCMHAUtils.getSCMRatisDirectory(conf);
String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf);

assertTrue(new File(ratisDir).mkdirs());
assertTrue(new File(snapshotDir).mkdirs());

CertificateClient certClient = mock(CertificateClient.class);

// Test
SCMSnapshotProvider provider = new SCMSnapshotProvider(
conf, null, certClient);

// Verify
assertNotNull(provider);
assertNotNull(provider.getScmSnapshotDir());
assertTrue(provider.getScmSnapshotDir().exists());
assertEquals(snapshotDir, provider.getScmSnapshotDir().getAbsolutePath());
}

/**
* Test constructor fails when snapshot directory does not exist.
*/
@Test
void testConstructorFailsWhenSnapshotDirMissing(@TempDir Path tempDir) {
// Setup: Create only Ratis storage directory, not snapshot directory
OzoneConfiguration conf = createConfig(tempDir);
String ratisDir = SCMHAUtils.getSCMRatisDirectory(conf);
assertTrue(new File(ratisDir).mkdirs());

CertificateClient certClient = mock(CertificateClient.class);

// Test & Verify
IllegalStateException exception = assertThrows(
IllegalStateException.class,
() -> new SCMSnapshotProvider(conf, null, certClient));

assertTrue(exception.getMessage().contains("Ratis snapshot directory does not exist"));
}

/**
* Test constructor fails when Ratis storage directory does not exist.
*/
@Test
void testConstructorFailsWhenRatisDirMissing(@TempDir Path tempDir) {
// Setup: Don't create any directories
OzoneConfiguration conf = createConfig(tempDir);
CertificateClient certClient = mock(CertificateClient.class);

// Test & Verify
IllegalStateException exception = assertThrows(
IllegalStateException.class,
() -> new SCMSnapshotProvider(conf, null, certClient));

assertTrue(exception.getMessage().contains("Ratis storage directory does not exist"));
}

/**
* Helper method to create OzoneConfiguration with temp directories.
*/
private OzoneConfiguration createConfig(Path tempDir) {
OzoneConfiguration conf = new OzoneConfiguration();

String metadataDir = tempDir.resolve("metadata").toString();
String ratisStorageDir = tempDir.resolve("ratis").toString();
String ratisSnapshotDir = tempDir.resolve("ratis-snapshot").toString();

conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, metadataDir);
conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_STORAGE_DIR, ratisStorageDir);
conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_SNAPSHOT_DIR, ratisSnapshotDir);

return conf;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ public void testStartupSlvLessThanMlv(@TempDir Path tempDir)
File scmSubdir = tempDir.resolve("scm").resolve("current").toFile();
assertTrue(scmSubdir.mkdirs());

// Create Ratis directories to simulate a realistic downgrade scenario
// where SCM was previously running with a newer version
File ratisDir = tempDir.resolve("scm.ratis").toFile();
File snapshotDir = tempDir.resolve("scm.ratis.snapshot").toFile();
assertTrue(ratisDir.mkdirs());
assertTrue(snapshotDir.mkdirs());

OzoneConfiguration conf = new OzoneConfiguration();
conf.set(ScmConfigKeys.OZONE_SCM_DB_DIRS,
tempDir.toAbsolutePath().toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
import org.apache.hadoop.hdds.scm.ha.RatisUtil;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.ha.SCMHANodeDetails;
import org.apache.hadoop.hdds.scm.ha.SCMHAUtils;
import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl;
import org.apache.hadoop.hdds.scm.node.DatanodeInfo;
Expand Down Expand Up @@ -603,17 +602,11 @@ public void testScmInfo(@TempDir Path tempDir) throws Exception {
Path scmPath = tempDir.resolve("scm-meta");

conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, scmPath.toString());
SCMStorageConfig scmStore = new SCMStorageConfig(conf);
String clusterId = UUID.randomUUID().toString();
String scmId = UUID.randomUUID().toString();
scmStore.setClusterId(clusterId);
scmStore.setScmId(scmId);
scmStore.setSCMHAFlag(true);
// writes the version file properties
scmStore.initialize();
SCMRatisServerImpl.initialize(clusterId, scmId,
SCMHANodeDetails.loadSCMHAConfig(conf, scmStore)
.getLocalNodeDetails(), conf);
// Use scmInit to initialize SCM properly (creates all required directories)
StorageContainerManager.scmInit(conf, clusterId);
SCMStorageConfig scmStore = new SCMStorageConfig(conf);
String scmId = scmStore.getScmId();
StorageContainerManager scm = HddsTestUtils.getScmSimple(conf);
try {
scm.start();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,6 @@
import org.apache.hadoop.hdds.scm.ScmConfig;
import org.apache.hadoop.hdds.scm.ScmInfo;
import org.apache.hadoop.hdds.scm.client.ScmTopologyClient;
import org.apache.hadoop.hdds.scm.ha.HASecurityUtils;
import org.apache.hadoop.hdds.scm.ha.SCMHANodeDetails;
import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl;
import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol;
import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol;
import org.apache.hadoop.hdds.scm.server.SCMHTTPServerConfig;
Expand Down Expand Up @@ -444,18 +441,10 @@ private void initSCM() throws IOException {
Files.createDirectories(scmPath);
conf.set(OZONE_METADATA_DIRS, scmPath.toString());

// Use scmInit to properly initialize SCM with all required directories
StorageContainerManager.scmInit(conf, clusterId);
SCMStorageConfig scmStore = new SCMStorageConfig(conf);
scmStore.setClusterId(clusterId);
scmStore.setScmId(scmId);
scmStore.setSCMHAFlag(true);
HASecurityUtils.initializeSecurity(scmStore, conf,
InetAddress.getLocalHost().getHostName(), true);
scmStore.setPrimaryScmNodeId(scmId);
// writes the version file properties
scmStore.initialize();
SCMRatisServerImpl.initialize(clusterId, scmId,
SCMHANodeDetails.loadSCMHAConfig(conf, scmStore)
.getLocalNodeDetails(), conf);
scmId = scmStore.getScmId();

/*
* As all these processes run inside the same JVM, there are issues around
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import java.util.concurrent.TimeoutException;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.HddsUtils;
import org.apache.hadoop.hdds.annotation.InterfaceAudience;
import org.apache.hadoop.hdds.client.RatisReplicationConfig;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
Expand All @@ -48,6 +49,7 @@
import org.apache.hadoop.hdds.scm.HddsTestUtils;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.ha.SCMHANodeDetails;
import org.apache.hadoop.hdds.scm.ha.SCMHAUtils;
import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl;
import org.apache.hadoop.hdds.scm.node.NodeStatus;
import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
Expand Down Expand Up @@ -621,6 +623,9 @@ protected void initializeScmStorage(SCMStorageConfig scmStore)
SCMHANodeDetails.loadSCMHAConfig(conf, scmStore)
.getLocalNodeDetails(), conf);

// Create Ratis snapshot directory for test environment
String snapshotDir = SCMHAUtils.getSCMRatisSnapshotDirectory(conf);
HddsUtils.createDir(snapshotDir);
}

void initializeOmStorage(OMStorage omStorage) throws IOException {
Expand Down