Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion agent/src/main/java/com/cloud/agent/Agent.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@

import javax.naming.ConfigurationException;

import com.cloud.resource.AgentStatusUpdater;
import com.cloud.resource.ResourceStatusUpdater;
import com.cloud.utils.NumbersUtil;
import org.apache.cloudstack.agent.lb.SetupMSListAnswer;
import org.apache.cloudstack.agent.lb.SetupMSListCommand;
Expand Down Expand Up @@ -100,7 +102,7 @@
* For more configuration options, see the individual types.
*
**/
public class Agent implements HandlerFactory, IAgentControl {
public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater {
protected static Logger s_logger = Logger.getLogger(Agent.class);

public enum ExitStatus {
Expand Down Expand Up @@ -409,6 +411,20 @@ public void scheduleWatch(final Link link, final Request request, final long del
}
}

public void triggerUpdate() {
PingCommand command = _resource.getCurrentStatus(getId());
command.setOutOfBand(true);
s_logger.debug("Sending out of band ping");

final Request request = new Request(_id, -1, command, false);
request.setSequence(getNextSequence());
try {
_link.send(request.toBytes());
} catch (final ClosedChannelException e) {
s_logger.warn("Unable to send ping update: " + request.toString());
}
}

protected void cancelTasks() {
synchronized (_watchList) {
for (final WatchTask task : _watchList) {
Expand Down Expand Up @@ -461,6 +477,10 @@ public void sendStartup(final Link link) {
} catch (final ClosedChannelException e) {
s_logger.warn("Unable to send request: " + request.toString());
}

if (_resource instanceof ResourceStatusUpdater) {
((ResourceStatusUpdater) _resource).registerStatusUpdater(this);
}
}
}

Expand Down
11 changes: 11 additions & 0 deletions core/src/main/java/com/cloud/agent/api/PingCommand.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
public class PingCommand extends Command {
Host.Type hostType;
long hostId;
boolean outOfBand;

protected PingCommand() {
}
Expand All @@ -33,6 +34,12 @@ public PingCommand(Host.Type type, long id) {
hostId = id;
}

public PingCommand(Host.Type type, long id, boolean oob) {
hostType = type;
hostId = id;
outOfBand = oob;
}

public Host.Type getHostType() {
return hostType;
}
Expand All @@ -41,6 +48,10 @@ public long getHostId() {
return hostId;
}

public boolean getOutOfBand() { return outOfBand; }

public void setOutOfBand(boolean oob) { this.outOfBand = oob; }

@Override
public boolean executeInSequence() {
return false;
Expand Down
27 changes: 27 additions & 0 deletions core/src/main/java/com/cloud/resource/AgentStatusUpdater.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.resource;

/**
* AgentStatusUpdater is an agent with triggerable update functionality
*/
public interface AgentStatusUpdater {
/**
* Trigger the sending of an update (Ping).
*/
void triggerUpdate();
}
29 changes: 29 additions & 0 deletions core/src/main/java/com/cloud/resource/ResourceStatusUpdater.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.resource;

/**
* ResourceStatusUpdater is a resource that can trigger out of band status updates
*/
public interface ResourceStatusUpdater {
/**
* Register an AgentStatusUpdater to use for triggering out of band updates.
*
* @param updater The object to call triggerUpdate() on
*/
void registerStatusUpdater(AgentStatusUpdater updater);
}
Original file line number Diff line number Diff line change
Expand Up @@ -3726,7 +3726,7 @@ public boolean processCommands(final long agentId, final long seq, final Command
if (cmd instanceof PingRoutingCommand) {
final PingRoutingCommand ping = (PingRoutingCommand)cmd;
if (ping.getHostVmStateReport() != null) {
_syncMgr.processHostVmStatePingReport(agentId, ping.getHostVmStateReport());
_syncMgr.processHostVmStatePingReport(agentId, ping.getHostVmStateReport(), ping.getOutOfBand());
}

scanStalledVMInTransitionStateOnUpHost(agentId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public interface VirtualMachinePowerStateSync {
void processHostVmStateReport(long hostId, Map<String, HostVmStateReportEntry> report);

// to adapt legacy ping report
void processHostVmStatePingReport(long hostId, Map<String, HostVmStateReportEntry> report);
void processHostVmStatePingReport(long hostId, Map<String, HostVmStateReportEntry> report, boolean force);

Map<Long, VirtualMachine.PowerState> convertVmStateReport(Map<String, HostVmStateReportEntry> states);
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,19 +55,19 @@ public void processHostVmStateReport(long hostId, Map<String, HostVmStateReportE
s_logger.debug("Process host VM state report. host: " + hostId);

Map<Long, VirtualMachine.PowerState> translatedInfo = convertVmStateReport(report);
processReport(hostId, translatedInfo);
processReport(hostId, translatedInfo, false);
}

@Override
public void processHostVmStatePingReport(long hostId, Map<String, HostVmStateReportEntry> report) {
public void processHostVmStatePingReport(long hostId, Map<String, HostVmStateReportEntry> report, boolean force) {
if (s_logger.isDebugEnabled())
s_logger.debug("Process host VM state report from ping process. host: " + hostId);

Map<Long, VirtualMachine.PowerState> translatedInfo = convertVmStateReport(report);
processReport(hostId, translatedInfo);
processReport(hostId, translatedInfo, force);
}

private void processReport(long hostId, Map<Long, VirtualMachine.PowerState> translatedInfo) {
private void processReport(long hostId, Map<Long, VirtualMachine.PowerState> translatedInfo, boolean force) {

if (s_logger.isDebugEnabled()) {
s_logger.debug("Process VM state report. host: " + hostId + ", number of records in report: " + translatedInfo.size());
Expand Down Expand Up @@ -117,7 +117,7 @@ private void processReport(long hostId, Map<Long, VirtualMachine.PowerState> tra

// Make sure powerState is up to date for missing VMs
try {
if (!_instanceDao.isPowerStateUpToDate(instance.getId())) {
if (!force && !_instanceDao.isPowerStateUpToDate(instance.getId())) {
s_logger.warn("Detected missing VM but power state is outdated, wait for another process report run for VM id: " + instance.getId());
_instanceDao.resetVmPowerStateTracking(instance.getId());
continue;
Expand Down Expand Up @@ -150,7 +150,7 @@ private void processReport(long hostId, Map<Long, VirtualMachine.PowerState> tra

long milliSecondsSinceLastStateUpdate = currentTime.getTime() - vmStateUpdateTime.getTime();

if (milliSecondsSinceLastStateUpdate > milliSecondsGracefullPeriod) {
if (force || milliSecondsSinceLastStateUpdate > milliSecondsGracefullPeriod) {
s_logger.debug("vm id: " + instance.getId() + " - time since last state update(" + milliSecondsSinceLastStateUpdate + "ms) has passed graceful period");

// this is were a race condition might have happened if we don't re-fetch the instance;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,20 +83,25 @@
import org.libvirt.DomainInfo.DomainState;
import org.libvirt.DomainInterfaceStats;
import org.libvirt.DomainSnapshot;
import org.libvirt.Library;
import org.libvirt.LibvirtException;
import org.libvirt.MemoryStatistic;
import org.libvirt.Network;
import org.libvirt.SchedParameter;
import org.libvirt.SchedUlongParameter;
import org.libvirt.Secret;
import org.libvirt.VcpuInfo;
import org.libvirt.event.DomainEvent;
import org.libvirt.event.DomainEventDetail;
import org.libvirt.event.StoppedDetail;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;


import com.cloud.agent.api.Answer;
import com.cloud.agent.api.Command;
import com.cloud.agent.api.HostVmStateReportEntry;
Expand Down Expand Up @@ -175,6 +180,8 @@
import com.cloud.network.Networks.IsolationType;
import com.cloud.network.Networks.RouterPrivateIpStrategy;
import com.cloud.network.Networks.TrafficType;
import com.cloud.resource.AgentStatusUpdater;
import com.cloud.resource.ResourceStatusUpdater;
import com.cloud.resource.RequestWrapper;
import com.cloud.resource.ServerResource;
import com.cloud.resource.ServerResourceBase;
Expand Down Expand Up @@ -224,11 +231,12 @@
* private mac addresses for domrs | mac address | start + 126 || ||
* pool | the parent of the storage pool hierarchy * }
**/
public class LibvirtComputingResource extends ServerResourceBase implements ServerResource, VirtualRouterDeployer {
public class LibvirtComputingResource extends ServerResourceBase implements ServerResource, VirtualRouterDeployer, ResourceStatusUpdater {
protected static Logger s_logger = Logger.getLogger(LibvirtComputingResource.class);

private static final String CONFIG_VALUES_SEPARATOR = ",";


private static final String LEGACY = "legacy";
private static final String SECURE = "secure";

Expand Down Expand Up @@ -457,6 +465,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
protected CPUStat _cpuStat = new CPUStat();
protected MemStat _memStat = new MemStat(_dom0MinMem, _dom0OvercommitMem);
private final LibvirtUtilitiesHelper libvirtUtilitiesHelper = new LibvirtUtilitiesHelper();
private AgentStatusUpdater _agentStatusUpdater;

protected Boolean enableManuallySettingCpuTopologyOnKvmVm = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.ENABLE_MANUALLY_SETTING_CPU_TOPOLOGY_ON_KVM_VM);

Expand All @@ -481,6 +490,11 @@ protected long getHypervisorQemuVersion() {
return _hypervisorQemuVersion;
}

@Override
public void registerStatusUpdater(AgentStatusUpdater updater) {
_agentStatusUpdater = updater;
}

@Override
public ExecutionResult executeInVR(final String routerIp, final String script, final String args) {
return executeInVR(routerIp, script, args, _timeout);
Expand Down Expand Up @@ -3590,9 +3604,63 @@ private StartupStorageCommand createLocalStoragePool(String localStoragePath, St
} catch (final CloudRuntimeException e) {
s_logger.debug("Unable to initialize local storage pool: " + e);
}
setupLibvirtEventListener();
return sscmd;
}

private void setupLibvirtEventListener() {
final Thread libvirtListenerThread = new Thread(() -> {
try {
Library.runEventLoop();
} catch (LibvirtException e) {
s_logger.error("LibvirtException was thrown in event loop: ", e);
} catch (InterruptedException e) {
s_logger.error("Libvirt event loop was interrupted: ", e);
}
});

try {
libvirtListenerThread.setDaemon(true);
libvirtListenerThread.start();

Connect conn = LibvirtConnection.getConnection();
conn.addLifecycleListener(this::onDomainLifecycleChange);

s_logger.debug("Set up the libvirt domain event lifecycle listener");
} catch (LibvirtException e) {
s_logger.error("Failed to get libvirt connection for domain event lifecycle", e);
}
}

private int onDomainLifecycleChange(Domain domain, DomainEvent domainEvent) {
try {
s_logger.debug(String.format("Got event lifecycle change on Domain %s, event %s", domain.getName(), domainEvent));
if (domainEvent != null) {
switch (domainEvent.getType()) {
case STOPPED:
/* libvirt-destroyed VMs have detail StoppedDetail.DESTROYED, self shutdown guests are StoppedDetail.SHUTDOWN
* Checking for this helps us differentiate between events where cloudstack or admin stopped the VM vs guest
* initiated, and avoid pushing extra updates for actions we are initiating without a need for extra tracking */
DomainEventDetail detail = domainEvent.getDetail();
if (StoppedDetail.SHUTDOWN.equals(detail) || StoppedDetail.CRASHED.equals(detail)) {
s_logger.info("Triggering out of band status update due to completed self-shutdown or crash of VM");
_agentStatusUpdater.triggerUpdate();
} else {
s_logger.debug("Event detail: " + detail);
}
break;
default:
s_logger.debug(String.format("No handling for event %s", domainEvent));
}
}
} catch (LibvirtException e) {
s_logger.error("Libvirt exception while processing lifecycle event", e);
} catch (Throwable e) {
s_logger.error("Error during lifecycle", e);
}
return 0;
}

public String diskUuidToSerial(String uuid) {
String uuidWithoutHyphen = uuid.replace("-","");
return uuidWithoutHyphen.substring(0, Math.min(uuidWithoutHyphen.length(), 20));
Expand Down