Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
7ebc81d
HA Agent check regardless of NFS
GabrielBrascher Jan 22, 2021
44a320a
Add KVM HA Client validation on HA execution flow
GabrielBrascher Jan 25, 2021
895421e
kvm: Add Agent Helper to investigate if a Host is truly down
wido Jan 22, 2021
f97100d
Integrating Agent HA Helper
GabrielBrascher Jan 27, 2021
2e8b563
Count number of VMs running on Define if a host is healthy by countin…
GabrielBrascher Jan 28, 2021
4bafec0
Add global settings configurations; enhance documentation; work on tests
GabrielBrascher Feb 11, 2021
fa998e1
Remove commented lines
GabrielBrascher Apr 29, 2021
ee4c28c
Consider Paused instances to count when a VM is being migrated and al…
GabrielBrascher Apr 30, 2021
6953b95
Final adjustments before calling for review
GabrielBrascher May 3, 2021
35449e8
Refactoring tests and adding small improvements
GabrielBrascher May 5, 2021
0506649
Fix missing parameter on log String.format
GabrielBrascher May 6, 2021
4f7936e
Update code addressing reviewers
GabrielBrascher May 11, 2021
6d59a81
Simplify code flow, enhance Python, address reviewers.
GabrielBrascher May 11, 2021
c37b39a
Create KvmHaHelper class. Enhance HA validations by checking if host …
GabrielBrascher May 19, 2021
b5191e8
Fix checkstyle
GabrielBrascher May 19, 2021
e4c11fa
Add tests and enhance HA flow via new KvmHaHelper
GabrielBrascher May 19, 2021
de8f68e
Fix isKvmHaWebserviceEnabled
GabrielBrascher May 19, 2021
7b4e086
Add test cases and config key
GabrielBrascher May 22, 2021
891a095
Change KvmHaAcceptedProblematicHostsRatio config key description
GabrielBrascher May 26, 2021
e2f20e3
Address reviewer enhancing log and 'if' conditional
GabrielBrascher Jun 2, 2021
10cd13b
Enhance Documentation & remove unnecessary Catch (them all)
GabrielBrascher Jun 21, 2021
839d13c
Fine adjustments
GabrielBrascher Jun 23, 2021
a9e9a8e
Re-check packaging
GabrielBrascher Jun 24, 2021
25d8a0b
Changes on centos7 packaging for agent-ha-helper
GabrielBrascher Jul 2, 2021
4ec91b5
Remove unnecessary line after rebasing and fixing conflicts with main
GabrielBrascher Jul 7, 2021
25ffd18
Remove nested IF in KvmHaHelper
GabrielBrascher Jul 7, 2021
9b519e2
Address review removing nested IF and enhancing KvmHaAgentClient.prep…
GabrielBrascher Jul 7, 2021
178a09c
change header
GabrielBrascher Jul 8, 2021
ddd8aba
Fix cloud.spec with permissions for cloudstack-agent-ha-helper
GabrielBrascher Jul 16, 2021
d760b55
Use Stream instead of For loop.
GabrielBrascher Jul 16, 2021
19b1884
Use Cluster scope for KvmHaAcceptedProblematicHostsRatio
GabrielBrascher Aug 11, 2021
a5d139c
Remove unused variable
GabrielBrascher Aug 12, 2021
955c65e
Enhance KVM HA checks via neighbour hosts.
GabrielBrascher Aug 16, 2021
89ff240
Enhance isKvmHaWebserviceEnabled
GabrielBrascher Aug 16, 2021
0e94c1d
Avoid issues in case of big cluster, and thus limit number of rertrie…
GabrielBrascher Aug 16, 2021
8b5faa7
Port python HTTP client to HTTPS
GabrielBrascher Nov 19, 2021
95c58d9
Rebase against main branch, fix test erros after updating default por…
GabrielBrascher Jan 4, 2022
a1271d2
Add SSL & Authentication support into the agent-ha-helper.py
GabrielBrascher Jan 18, 2022
5918bef
Update KVM HA Client to mach with KVM HA Helper HTTPS server.
GabrielBrascher Jan 21, 2022
10c8636
Update "check-neighbour" and client/server HTTPs with Auth
GabrielBrascher Mar 9, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add KVM HA Client validation on HA execution flow
  • Loading branch information
GabrielBrascher committed Mar 4, 2022
commit 44a320a31d92f47a49b122861ffefae6fc26c21c
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@
import com.cloud.host.Status;
import com.cloud.host.dao.HostDao;
import com.cloud.hypervisor.Hypervisor;
import com.cloud.hypervisor.kvm.resource.KvmAgentHaClient;
import com.cloud.resource.ResourceManager;
import com.cloud.storage.Storage.StoragePoolType;
import com.cloud.utils.component.AdapterBase;
import org.apache.cloudstack.ha.HAManager;
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
import org.apache.log4j.Logger;
import org.jetbrains.annotations.NotNull;

import javax.inject.Inject;
import java.util.List;
Expand Down Expand Up @@ -81,11 +81,19 @@ public Status isAgentAlive(Host agent) {
Status agentStatus = Status.Disconnected;
boolean hasNfs = isHostServedByNfsPool(agent);
if (hasNfs) {
s_logger.debug("Agent investigation was requested on host " + agent + ", checking agent status via NFS storage.");
agentStatus = checkAgentStatusViaNfs(agent);
s_logger.debug(String.format("Agent investigation was requested on host %s, agent status via NFS storage is %s.", agent, agentStatus));
} else {
s_logger.debug(
"Agent investigation was requested on host " + agent + ", but host has no NFS storage. Skipping investigation via NFS.");
s_logger.debug(String.format("Agent investigation was requested on host %s, but host has no NFS storage. Skipping investigation via NFS.", agent));
}

KvmAgentHaClient kvmAgentHaClient = new KvmAgentHaClient(agent.getPrivateIpAddress());
boolean isKvmAgentRunning = kvmAgentHaClient.isKvmHaAgentRunning();
if(isKvmAgentRunning) {
agentStatus = Status.Up;
s_logger.debug(String.format("Checking agent %s status; KVM HA webserver is Running as expected."));
} else {
s_logger.warn(String.format("Checking agent %s status. Failed to check host status via KVM Agent HA webserver"));
}

return agentStatus;
Expand Down Expand Up @@ -119,7 +127,6 @@ private boolean hasNfsPoolClusterWideForHost(Host agent) {
return false;
}

@NotNull
private Status checkAgentStatusViaNfs(Host agent) {
Status hostStatus = null;
Status neighbourStatus = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,17 @@
package com.cloud.hypervisor.kvm.resource;

import com.cloud.utils.exception.CloudRuntimeException;
import com.google.gson.JsonParser;
import org.apache.cloudstack.utils.redfish.RedfishException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.log4j.Logger;
import org.jetbrains.annotations.Nullable;

import java.io.BufferedReader;
import java.io.IOException;
Expand All @@ -46,73 +50,102 @@ public class KvmAgentHaClient {

private static final Logger LOGGER = Logger.getLogger(KvmAgentHaClient.class);
private final static int WAIT_FOR_REQUEST_RETRY = 2;
private final static String VM_COUNT = "count";
private final static int ERROR_CODE = -1;
private final static String EXPECTED_HTTP_STATUS = "2XX";
private static final int MAX_REQUEST_RETRIES = 2;
private static final int DEFAULT_PORT = 8080;
private String agentIpAddress;
private int port;
private int requestMaxRetries = 0; //TODO

public KvmAgentHaClient(String agentIpAddress, int port) {
/**
* Instantiates a webclient that checks, via a webserver running on the KVM host, the VMs running
* @param agentIpAddress address of the KVM host running the webserver
*/
public KvmAgentHaClient(String agentIpAddress) {
this.agentIpAddress = agentIpAddress;
this.port = port;
}

public boolean isKvmHaAgentRunning() {
if (countRunningVmsOnAgent() < 0) {
return false;
}
return true;
}

/**
* TODO
* Returns the System ID. Used when sending Computer System requests (e.g. ComputerSystem.Reset request).
* Returns the number of VMs running on the KVM host according to libvirt.
*/
public String checkVmsRunningOnAgent() {
String url = String.format("http://%s:%d", agentIpAddress, port);
public int countRunningVmsOnAgent() {
String url = String.format("http://%s:%d", agentIpAddress, DEFAULT_PORT);
HttpResponse response = executeHttpRequest(url);

URIBuilder builder = null;
if (response == null)
return ERROR_CODE;

return Integer.valueOf(processHttpResponseIntoJson(response));
}

/**
* Executes a GET request for the given URL address.
*/
@Nullable
protected HttpResponse executeHttpRequest(String url) {
HttpGet httpReq = null;
try {
builder = new URIBuilder(url);
URIBuilder builder = new URIBuilder(url);
httpReq = new HttpGet(builder.build());
} catch (URISyntaxException e) {
throw new CloudRuntimeException(String.format("Failed to create URI for GET request [URL: %s] due to exception.", url), e);
LOGGER.error(String.format("Failed to create URI for GET request [URL: %s] due to exception.", url), e);
return null;
}

HttpClient client = HttpClientBuilder.create().build();

HttpResponse response = null;

try {
response = client.execute(httpReq);
} catch (IOException e) {
if (requestMaxRetries == 0) {
throw new CloudRuntimeException(String.format("Failed to execute HTTP %s request [URL: %s] due to exception %s.", httpReq.getMethod(), url, e), e);
if (MAX_REQUEST_RETRIES == 0) {
LOGGER.warn(String.format("Failed to execute HTTP %s request [URL: %s] due to exception %s.", httpReq.getMethod(), url, e), e);
return null;
}
retryHttpRequest(url, httpReq, client);
}

return processHttpResponseIntoJson(response);
return response;
}

/**
* TODO
* Re-executes the HTTP GET request until it gets a response or it reaches the maximum request retries (#MAX_REQUEST_RETRIES)
*/
protected HttpResponse retryHttpRequest(String url, HttpRequestBase httpReq, HttpClient client) {
LOGGER.warn(String.format("Failed to execute HTTP %s request [URL: %s]. Executing the request again.", httpReq.getMethod(), url));
HttpResponse response = null;
for (int attempt = 1; attempt < requestMaxRetries + 1; attempt++) {
for (int attempt = 1; attempt < MAX_REQUEST_RETRIES + 1; attempt++) {
try {
TimeUnit.SECONDS.sleep(WAIT_FOR_REQUEST_RETRY);
LOGGER.debug(String.format("Retry HTTP %s request [URL: %s], attempt %d/%d.", httpReq.getMethod(), url, attempt, requestMaxRetries));
LOGGER.debug(String.format("Retry HTTP %s request [URL: %s], attempt %d/%d.", httpReq.getMethod(), url, attempt, MAX_REQUEST_RETRIES));
response = client.execute(httpReq);
} catch (IOException | InterruptedException e) {
if (attempt == requestMaxRetries) {
throw new CloudRuntimeException(
String.format("Failed to execute HTTP %s request retry attempt %d/%d [URL: %s] due to exception %s", httpReq.getMethod(), attempt, requestMaxRetries,
if (attempt == MAX_REQUEST_RETRIES) {
LOGGER.error(
String.format("Failed to execute HTTP %s request retry attempt %d/%d [URL: %s] due to exception %s", httpReq.getMethod(), attempt, MAX_REQUEST_RETRIES,
url, e));
} else {
LOGGER.warn(
String.format("Failed to execute HTTP %s request retry attempt %d/%d [URL: %s] due to exception %s", httpReq.getMethod(), attempt, requestMaxRetries,
LOGGER.error(
String.format("Failed to execute HTTP %s request retry attempt %d/%d [URL: %s] due to exception %s", httpReq.getMethod(), attempt, MAX_REQUEST_RETRIES,
url, e));
}
}
}

if (response == null) {
throw new CloudRuntimeException(String.format("Failed to execute HTTP %s request [URL: %s].", httpReq.getMethod(), url));
LOGGER.error(String.format("Failed to execute HTTP %s request [URL: %s].", httpReq.getMethod(), url));
}

int statusCode = response.getStatusLine().getStatusCode();
if (statusCode < HttpStatus.SC_OK || statusCode >= HttpStatus.SC_MULTIPLE_CHOICES) {
throw new RedfishException(String.format("Failed to get VMs information with a %s request to URL '%s'. The expected HTTP status code is '%s' but it got '%s'.",
HttpGet.METHOD_NAME, url, EXPECTED_HTTP_STATUS, statusCode));
}

LOGGER.debug(String.format("Successfully executed HTTP %s request [URL: %s].", httpReq.getMethod(), url));
Expand All @@ -126,14 +159,19 @@ protected HttpResponse retryHttpRequest(String url, HttpRequestBase httpReq, Htt
protected String processHttpResponseIntoJson(HttpResponse response) {
InputStream in;
String jsonString;
if (response == null) {
return Integer.toString(ERROR_CODE);
}
try {
in = response.getEntity().getContent();
BufferedReader streamReader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
jsonString = streamReader.readLine();
} catch (UnsupportedOperationException | IOException e) {
throw new CloudRuntimeException("Failed to process system Response", e);
throw new CloudRuntimeException("Failed to process response", e);
}
return jsonString;

String vmsCount = new JsonParser().parse(jsonString).getAsJsonObject().get(VM_COUNT).getAsString();
return vmsCount;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
import com.cloud.host.HostVO;
import com.cloud.host.Status;
import com.cloud.hypervisor.Hypervisor;
import com.cloud.hypervisor.kvm.resource.KvmAgentHaClient;
import com.cloud.resource.ResourceManager;
import com.cloud.storage.Storage;
import com.cloud.storage.StorageManager;
import com.cloud.storage.StoragePool;
import com.cloud.storage.Volume;
Expand Down Expand Up @@ -81,7 +83,29 @@ public boolean isActive(Host r, DateTime suspectTime) throws HACheckerException

@Override
public boolean isHealthy(Host r) {
return isAgentActive(r);
boolean isHealthy = false;
HashMap<StoragePool, List<Volume>> poolVolMap = getVolumeUuidOnHost(r);
isHealthy = isHealthyCheckViaNfs(r, isHealthy, poolVolMap);

KvmAgentHaClient kvmAgentHaClient = new KvmAgentHaClient(r.getPrivateIpAddress());
boolean isKvmAgentRunning = kvmAgentHaClient.isKvmHaAgentRunning();

if(!isHealthy && isKvmAgentRunning) {
isHealthy = true;
}

return isHealthy;
}

private boolean isHealthyCheckViaNfs(Host r, boolean isHealthy, HashMap<StoragePool, List<Volume>> poolVolMap) {
for (StoragePool pool : poolVolMap.keySet()) {
if(Storage.StoragePoolType.NetworkFilesystem == pool.getPoolType()
|| Storage.StoragePoolType.ManagedNFS == pool.getPoolType()
|| Storage.StoragePoolType.ManagedNFS == pool.getPoolType()) {
isHealthy = isAgentActive(r);
}
}
return isHealthy;
}

private boolean isAgentActive(Host agent) {
Expand Down Expand Up @@ -151,19 +175,33 @@ private boolean isVMActivtyOnHost(Host agent, DateTime suspectTime) throws HAChe
if (agent.getHypervisorType() != Hypervisor.HypervisorType.KVM && agent.getHypervisorType() != Hypervisor.HypervisorType.LXC) {
throw new IllegalStateException(String.format("Calling KVM investigator for non KVM Host of type [%s].", agent.getHypervisorType()));
}
boolean activityStatus = true;
boolean activityStatus = false;
HashMap<StoragePool, List<Volume>> poolVolMap = getVolumeUuidOnHost(agent);
for (StoragePool pool : poolVolMap.keySet()) {
activityStatus = verifyActivityOfStorageOnHost(poolVolMap, pool, agent, suspectTime, activityStatus);
if (!activityStatus) {
LOG.warn(String.format("It seems that the storage pool [%s] does not have activity on %s.", pool.getId(), agent.toString()));
break;
if(Storage.StoragePoolType.NetworkFilesystem == pool.getPoolType()
|| Storage.StoragePoolType.ManagedNFS == pool.getPoolType()
|| Storage.StoragePoolType.ManagedNFS == pool.getPoolType()) {
activityStatus = verifyActivityOfStorageOnHost(poolVolMap, pool, agent, suspectTime, activityStatus);
if (!activityStatus) {
LOG.warn(String.format("It seems that the storage pool [%s] does not have activity on %s.", pool.getId(), agent.toString()));
break;
}
}
}

KvmAgentHaClient kvmAgentHaClient = new KvmAgentHaClient(agent.getPrivateIpAddress());
boolean isKvmAgentRunning = kvmAgentHaClient.isKvmHaAgentRunning();

if(!activityStatus && isKvmAgentRunning) {
activityStatus = true;
} else {
LOG.warn(String.format("No VM activity detected on %s. This might trigger HA Host Recovery and/or Fence.", agent.toString()));
}

return activityStatus;
}


protected boolean verifyActivityOfStorageOnHost(HashMap<StoragePool, List<Volume>> poolVolMap, StoragePool pool, Host agent, DateTime suspectTime, boolean activityStatus) throws HACheckerException, IllegalStateException {
List<Volume> volume_list = poolVolMap.get(pool);
final CheckVMActivityOnStoragePoolCommand cmd = new CheckVMActivityOnStoragePoolCommand(agent, pool, volume_list, suspectTime);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,35 @@
*/
package com.cloud.hypervisor.kvm.resource;

import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.junit.MockitoJUnitRunner;
import org.apache.http.StatusLine;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.mockito.Mockito;

@RunWith(MockitoJUnitRunner.class)
//@RunWith(MockitoJUnitRunner.class)
public class KvmAgentHaClientTest {

private static final String AGENT_ADDRESS = "kvm-agent.domain.name";

private KvmAgentHaClient kvmAgentHaClient = Mockito.spy(new KvmAgentHaClient(AGENT_ADDRESS));

//TODO
@Test
// @test
public void checkHostStatusTest() {
KvmAgentHaClient kvmAgentHaClient = new KvmAgentHaClient("host", 8080);
System.out.println(kvmAgentHaClient.checkVmsRunningOnAgent());
int kvmAgentResponse = kvmAgentHaClient.countRunningVmsOnAgent();
}

// @Test
public void isKvmHaAgentRunningTest() {
boolean isKvmAgentRunning = kvmAgentHaClient.isKvmHaAgentRunning();
}

private CloseableHttpResponse mockResponse(int httpStatusCode) {
StatusLine statusLine = Mockito.mock(StatusLine.class);
Mockito.doReturn(httpStatusCode).when(statusLine).getStatusCode();
CloseableHttpResponse response = Mockito.mock(CloseableHttpResponse.class);
Mockito.doReturn(statusLine).when(response).getStatusLine();
Mockito.doReturn(response).when(kvmAgentHaClient).executeHttpRequest(Mockito.anyString());
return response;
}

}