rebuild 重建:
虚拟重置为初始状态, 或者更换镜像等
一.API
API入口在nova/api/openstack/compute/servers.py
@wsgi.action('rebuild')
@validation.schema(schema_server_rebuild_v20, '2.0', '2.0')
@validation.schema(schema_server_rebuild, '2.1', '2.18')
@validation.schema(schema_server_rebuild_v219, '2.19')
def _action_rebuild(self, req, id, body):
"""Rebuild an instance with the given attributes."""
rebuild_dict = body['rebuild']
// 重建的镜像参数
image_href = rebuild_dict["imageRef"]
// 重建指定密码
password = self._get_server_admin_password(rebuild_dict)
// 校验instance 以及 校验是否允许rebuild权限
context = req.environ['nova.context']
instance = self._get_server(context, req, id)
context.can(server_policies.SERVERS % 'rebuild',
target={'user_id': instance.user_id,
'project_id': instance.project_id})
attr_map = {
'name': 'display_name',
'description': 'display_description',
'metadata': 'metadata',
}
kwargs = {}
helpers.translate_attributes(helpers.REBUILD, rebuild_dict, kwargs)
for request_attribute, instance_attribute in attr_map.items():
try:
if request_attribute == 'name':
kwargs[instance_attribute] = common.normalize_name(
rebuild_dict[request_attribute])
else:
kwargs[instance_attribute] = rebuild_dict[
request_attribute]
except (KeyError, TypeError):
pass
try:
// 重建
self.compute_api.rebuild(context,
instance,
image_href,
password,
**kwargs)
except exception.InstanceIsLocked as e:
raise exc.HTTPConflict(explanation=e.format_message())
except exception.InstanceInvalidState as state_error:
common.raise_http_conflict_for_instance_invalid_state(state_error,
'rebuild', id)
......
instance = self._get_server(context, req, id, is_detail=True)
view = self._view_builder.show(req, instance, extend_address=False)
# Add on the admin_password attribute since the view doesn't do it
# unless instance passwords are disabled
if CONF.api.enable_instance_password:
view['server']['adminPass'] = password
robj = wsgi.ResponseObject(view)
return self._add_location(robj)
二. compute rebuild
compute_api.rebuild 定义在nova/compute/api.py
该函数为一个长函数, 这里进行拆分查看
@check_instance_lock
@check_instance_cell
// 校验虚拟机状态, 只允许在指定状态下进行
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
vm_states.ERROR])
def rebuild(self, context, instance, image_href, admin_password,
files_to_inject=None, **kwargs):
// 获取参数, 并进行参数校验
files_to_inject = files_to_inject or []
metadata = kwargs.get('metadata', {})
preserve_ephemeral = kwargs.get('preserve_ephemeral', False)
auto_disk_config = kwargs.get('auto_disk_config')
image_id, image = self._get_image(context, image_href)
self._check_auto_disk_config(image=image, **kwargs)
flavor = instance.get_flavor()
获取磁盘的mapping信息
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)
root_bdm = compute_utils.get_root_bdm(context, instance, bdms)
# Check to see if the image is changing and we have a volume-backed
# server.
is_volume_backed = compute_utils.is_volume_backed_instance(
context, instance, bdms)
if is_volume_backed:
# For boot from volume, instance.image_ref is empty, so we need to
# query the image from the volume.
if root_bdm is None:
# This shouldn't happen and is an error, we need to fail. This
# is not the users fault, it's an internal error. Without a
# root BDM we have no way of knowing the backing volume (or
# image in that volume) for this instance.
raise exception.NovaException(
_('Unable to find root block device mapping for '
'volume-backed instance.'))
volume = self.volume_api.get(context, root_bdm.volume_id)
volume_image_metadata = volume.get('volume_image_metadata', {})
orig_image_ref = volume_image_metadata.get('image_id')
else:
orig_image_ref = instance.image_ref
// 校验参数, 包含inject的参数
self._checks_for_create_and_rebuild(context, image_id, image,
flavor, metadata, files_to_inject, root_bdm)
kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk(
context, None, None, image)
重置磁盘的metadata, 移除旧数据
def _reset_image_metadata():
"""Remove old image properties that we're storing as instance
system metadata. These properties start with 'image_'.
Then add the properties for the new image.
"""
# FIXME(comstud): There's a race condition here in that if
# the system_metadata for this instance is updated after
# we do the previous save() and before we update.. those
# other updates will be lost. Since this problem exists in
# a lot of other places, I think it should be addressed in
# a DB layer overhaul.
orig_sys_metadata = dict(instance.system_metadata)
# Remove the old keys
for key in list(instance.system_metadata.keys()):
if key.startswith(utils.SM_IMAGE_PROP_PREFIX):
del instance.system_metadata[key]
# Add the new ones
new_sys_metadata = utils.get_system_metadata_from_image(
image, flavor)
instance.system_metadata.update(new_sys_metadata)
instance.save()
return orig_sys_metadata
更新instance状态
// 更新instance状态
# Since image might have changed, we may have new values for
# os_type, vm_mode, etc
options_from_image = self._inherit_properties_from_image(
image, auto_disk_config)
instance.update(options_from_image)
instance.task_state = task_states.REBUILDING
instance.image_ref = image_href
instance.kernel_id = kernel_id or ""
instance.ramdisk_id = ramdisk_id or ""
instance.progress = 0
instance.update(kwargs)
instance.save(expected_task_state=[None])
orig_sys_metadata = _reset_image_metadata()
self._record_action_start(context, instance, instance_actions.REBUILD)
// 获取,或调整request_spec 调度参数
host = instance.host
try:
request_spec = objects.RequestSpec.get_by_instance_uuid(
context, instance.uuid)
# If a new image is provided on rebuild, we will need to run
# through the scheduler again, but we want the instance to be
# rebuilt on the same host it's already on.
if orig_image_ref != image_href:
# We have to modify the request spec that goes to the scheduler
# to contain the new image. We persist this since we've already
# changed the instance.image_ref above so we're being
# consistent.
request_spec.image = objects.ImageMeta.from_dict(image)
request_spec.save()
if 'scheduler_hints' not in request_spec:
request_spec.scheduler_hints = {}
# Nuke the id on this so we can't accidentally save
# this hint hack later
del request_spec.id
# NOTE(danms): Passing host=None tells conductor to
# call the scheduler. The _nova_check_type hint
# requires that the scheduler returns only the same
# host that we are currently on and only checks
# rebuild-related filters.
// 镜像不同时,将重新进行一次调度, 调度的主机,依旧在原宿主机上
request_spec.scheduler_hints['_nova_check_type'] = ['rebuild']
request_spec.force_hosts = [instance.host]
request_spec.force_nodes = [instance.node]
host = None
except exception.RequestSpecNotFound:
# Some old instances can still have no RequestSpec object attached
# to them, we need to support the old way
request_spec = None
重建
self.compute_task_api.rebuild_instance(context, instance=instance,
new_pass=admin_password, injected_files=files_to_inject,
image_ref=image_href, orig_image_ref=orig_image_ref,
orig_sys_metadata=orig_sys_metadata, bdms=bdms,
preserve_ephemeral=preserve_ephemeral, host=host,
request_spec=request_spec,
kwargs=kwargs)
实际调用conductor 的rebuild_instance
三. conductor rebuild
rebuild_instance 定义在nova/conductor/manager.py
def rebuild_instance(self, context, instance, orig_image_ref, image_ref,
injected_files, new_pass, orig_sys_metadata,
bdms, recreate, on_shared_storage,
preserve_ephemeral=False, host=None,
request_spec=None):
with compute_utils.EventReporter(context, 'rebuild_server',
instance.uuid):
node = limits = None
try:
migration = objects.Migration.get_by_instance_and_status(
context, instance.uuid, 'accepted')
except exception.MigrationNotFoundByStatus:
LOG.debug("No migration record for the rebuild/evacuate "
"request.", instance=instance)
migration = None
// 如果没有指定主机, 则 重新调度
if not host:
if not request_spec:
image_meta = nova_object.obj_to_primitive(
instance.image_meta)
request_spec = scheduler_utils.build_request_spec(
context, image_meta, [instance])
elif recreate:
request_spec.ignore_hosts = request_spec.ignore_hosts or []
request_spec.ignore_hosts.append(instance.host)
request_spec.reset_forced_destinations()
filter_properties = request_spec.\
to_legacy_filter_properties_dict()
request_spec = request_spec.to_legacy_request_spec_dict()
else:
filter_properties = request_spec. \
to_legacy_filter_properties_dict()
request_spec = request_spec.to_legacy_request_spec_dict()
try:
// 调度新主机, 之前使用了force host, 因此还是会分配到原来的属主机上
hosts = self._schedule_instances(
context, request_spec, filter_properties)
host_dict = hosts.pop(0)
host, node, limits = (host_dict['host'],
host_dict['nodename'],
host_dict['limits'])
except exception.NoValidHost as ex:
......
compute_utils.notify_about_instance_usage(
self.notifier, context, instance, "rebuild.scheduled")
调用目标段宿主机的rebuild_instance
self.compute_rpcapi.rebuild_instance(context,
instance=instance,
new_pass=new_pass,
injected_files=injected_files,
image_ref=image_ref,
orig_image_ref=orig_image_ref,
orig_sys_metadata=orig_sys_metadata,
bdms=bdms,
recreate=recreate,
on_shared_storage=on_shared_storage,
preserve_ephemeral=preserve_ephemeral,
migration=migration,
host=host, node=node, limits=limits)
四. compute rebuild_instance
compute_rpcapi.rebuild_instance 定义在 nova/compute/manager.py
定义:
@wrap_instance_event(prefix='compute')
@wrap_instance_fault
def rebuild_instance(self, context, instance, orig_image_ref, image_ref,
injected_files, new_pass, orig_sys_metadata,
bdms, recreate, on_shared_storage=None,
preserve_ephemeral=False, migration=None,
scheduled_node=None, limits=None):
context = context.elevated()
LOG.info(_LI("Rebuilding instance"), instance=instance)
// 是否重新生成, 这里为False, 忽略该部分即可
if recreate:
rt = self._get_resource_tracker()
rebuild_claim = rt.rebuild_claim
else:
rebuild_claim = claims.NopClaim
image_meta = {}
if image_ref:
image_meta = self.image_api.get(context, image_ref)
// 确认主机, 如果没有,则使用instance的host, 保持宿主机不变
if not scheduled_node:
if recreate:
try:
compute_node = self._get_compute_info(context, self.host)
scheduled_node = compute_node.hypervisor_hostname
except exception.ComputeHostNotFound:
LOG.exception(_LE('Failed to get compute_info for %s'),
self.host)
else:
scheduled_node = instance.node
with self._error_out_instance_on_exception(context, instance):
try:
// 重建
claim_ctxt = rebuild_claim(
context, instance, scheduled_node,
limits=limits, image_meta=image_meta,
migration=migration)
self._do_rebuild_instance_with_claim(
claim_ctxt, context, instance, orig_image_ref,
image_ref, injected_files, new_pass, orig_sys_metadata,
bdms, recreate, on_shared_storage, preserve_ephemeral,
migration)
except exception.ComputeResourcesUnavailable as e:
.....
else:
instance.apply_migration_context()
# NOTE (ndipanov): This save will now update the host and node
# attributes making sure that next RT pass is consistent since
# it will be based on the instance and not the migration DB
# entry.
instance.host = self.host
instance.node = scheduled_node
instance.save()
instance.drop_migration_context()
# NOTE (ndipanov): Mark the migration as done only after we
# mark the instance as belonging to this host.
self._set_migration_status(migration, 'done')
_do_rebuild_instance_with_claim
定义:
def _do_rebuild_instance_with_claim(self, claim_context, *args, **kwargs):
"""Helper to avoid deep nesting in the top-level method."""
with claim_context:
self._do_rebuild_instance(*args, **kwargs)
def _do_rebuild_instance(self, context, instance, orig_image_ref,
image_ref, injected_files, new_pass,
orig_sys_metadata, bdms, recreate,
on_shared_storage, preserve_ephemeral,
migration):
orig_vm_state = instance.vm_state
// recreate 该部分可以直接忽略
if recreate:
....
if image_ref:
image_meta = objects.ImageMeta.from_image_ref(
context, self.image_api, image_ref)
else:
image_meta = instance.image_meta
// 获取磁盘信息
orig_image_ref_url = glance.generate_image_url(orig_image_ref)
extra_usage_info = {'image_ref_url': orig_image_ref_url}
compute_utils.notify_usage_exists(
self.notifier, context, instance,
current_period=True, system_metadata=orig_sys_metadata,
extra_usage_info=extra_usage_info)
# This message should contain the new image_ref
extra_usage_info = {'image_name': self._get_image_name(image_meta)}
self._notify_about_instance_usage(context, instance,
"rebuild.start", extra_usage_info=extra_usage_info)
// 更新虚拟机状态
instance.power_state = self._get_power_state(context, instance)
instance.task_state = task_states.REBUILDING
instance.save(expected_task_state=[task_states.REBUILDING])
if recreate:
...
else:
network_info = compute_utils.get_nw_info_for_instance(instance)
if bdms is None:
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)
block_device_info = \
self._get_instance_block_device_info(
context, instance, bdms=bdms)
def detach_block_devices(context, bdms):
for bdm in bdms:
if bdm.is_volume:
self._detach_volume(context, bdm.volume_id, instance,
destroy_bdm=False)
files = self._decode_files(injected_files)
kwargs = dict(
context=context,
instance=instance,
image_meta=image_meta,
injected_files=files,
admin_password=new_pass,
bdms=bdms,
detach_block_devices=detach_block_devices,
attach_block_devices=self._prep_block_device,
block_device_info=block_device_info,
network_info=network_info,
preserve_ephemeral=preserve_ephemeral,
recreate=recreate)
try:
// 调用libvirt 重建
with instance.mutated_migration_context():
self.driver.rebuild(**kwargs)
except NotImplementedError:
# NOTE(rpodolyaka): driver doesn't provide specialized version
# of rebuild, fall back to the default implementation
// 实际使用该过程进行重建
self._rebuild_default_impl(**kwargs)
// 更新虚拟机状态及事件
self._update_instance_after_spawn(context, instance)
instance.save(expected_task_state=[task_states.REBUILD_SPAWNING])
if orig_vm_state == vm_states.STOPPED:
LOG.info(_LI("bringing vm to original state: '%s'"),
orig_vm_state, instance=instance)
instance.vm_state = vm_states.ACTIVE
instance.task_state = task_states.POWERING_OFF
instance.progress = 0
instance.save()
self.stop_instance(context, instance, False)
self._update_scheduler_instance_info(context, instance)
self._notify_about_instance_usage(
context, instance, "rebuild.end",
network_info=network_info,
extra_usage_info=extra_usage_info)
注意:
self.driver.rebuild(**kwargs) libvirt
并没有提供rebuild的方法
因此实际使用
self._rebuild_default_impl(**kwargs)
def _rebuild_default_impl(self, context, instance, image_meta,
injected_files, admin_password, bdms,
detach_block_devices, attach_block_devices,
network_info=None,
recreate=False, block_device_info=None,
preserve_ephemeral=False):
if preserve_ephemeral:
# The default code path does not support preserving ephemeral
# partitions.
raise exception.PreserveEphemeralNotSupported()
if recreate:
detach_block_devices(context, bdms)
else:
// 关机
self._power_off_instance(context, instance, clean_shutdown=True)
// 卸载磁盘
detach_block_devices(context, bdms)
// 销毁虚拟机
self.driver.destroy(context, instance,
network_info=network_info,
block_device_info=block_device_info)
// 更新主机的状态
instance.task_state = task_states.REBUILD_BLOCK_DEVICE_MAPPING
instance.save(expected_task_state=[task_states.REBUILDING])
new_block_device_info = attach_block_devices(context, instance, bdms)
instance.task_state = task_states.REBUILD_SPAWNING
instance.save(
expected_task_state=[task_states.REBUILD_BLOCK_DEVICE_MAPPING])
// 使用spawn 创建虚拟机
with instance.mutated_migration_context():
self.driver.spawn(context, instance, image_meta, injected_files,
admin_password, network_info=network_info,
block_device_info=new_block_device_info)
五.spawn 创建虚拟机
driver.spawn 实际使用的就是虚拟机创建过程
源码定义在nova/libvirt/driver.py
创建过程定义如下:
# NOTE(ilyaalekseyev): Implementation like in multinics
# for xenapi(tr3buchet)
def spawn(self, context, instance, image_meta, injected_files,
admin_password, network_info=None, block_device_info=None):
disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
instance,
image_meta,
block_device_info)
injection_info = InjectionInfo(network_info=network_info,
files=injected_files,
admin_pass=admin_password)
gen_confdrive = functools.partial(self._create_configdrive,
context, instance,
injection_info)
self._create_image(context, instance, disk_info['mapping'],
injection_info=injection_info,
block_device_info=block_device_info)
# Required by Quobyte CI
self._ensure_console_log_for_instance(instance)
xml = self._get_guest_xml(context, instance, network_info,
disk_info, image_meta,
block_device_info=block_device_info)
self._create_domain_and_network(
context, xml, instance, network_info, disk_info,
block_device_info=block_device_info,
post_xml_callback=gen_confdrive,
destroy_disks_on_failure=True)
LOG.debug("Instance is running", instance=instance)
def _wait_for_boot():
"""Called at an interval until the VM is running."""
state = self.get_info(instance).state
if state == power_state.RUNNING:
LOG.info(_LI("Instance spawned successfully."),
instance=instance)
raise loopingcall.LoopingCallDone()
timer = loopingcall.FixedIntervalLoopingCall(_wait_for_boot)
timer.start(interval=0.5).wait()
该部分与虚拟机创建,一致, 这里不做深入分析