天天看點

Kubelet pod 建立工作流程

Kubelet 是 Kubernetes 的四大元件之一,它維護着 Pod 的整個生命周期,是 Kubernetes 建立 Pod 過程中的最後一個環節。本文将介紹 Kubelet 如何建立 Pod。

Kubelet 的架構

先看一張Kubelet的元件架構圖,如下。

Kubelet pod 建立工作流程

可以看出,Kubelet 主要分為三層:API 層、syncLoop 層、CRI 及以下;API層很好了解,就是對外提供接口的部分;syncLoop層是Kubelet的核心工作層,Kubelet的主要工作就是圍繞着這個syncLoop,即控制循環,由生産者和消費者運作;CRI 提供容器和鏡像服務的接口,容器在運作時可以作為 CRI 插件通路。CRI 為容器和鏡像服務提供接口,在容器運作時可以作為 CRI 插件通路。

讓我們看一下syncLoop層的一些重要元件。

  • PLEG:調用容器運作時接口擷取本節點的容器/沙箱資訊,與本地維護的pod緩存進行對比,生成對應的PodLifecycleEvent,然後通過eventChannel發送給Kubelet syncLoop,然後通過定時任務最終達到使用者期望的狀态。
  • CAdvisor:內建在 Kubelet 中的容器監控工具,用于收集本節點和容器的監控資訊。
  • PodWorkers:注冊了多個 pod handler,用于在不同時間處理 pod,包括建立、更新、删除等。
  • oomWatcher:系統OOM的監聽器,将與CAdvisor子產品建立SystemOOM,并通過Watch從CAdvisor接收到OOM信号相關的事件。
  • containerGC:負責清理節點上無用的容器,具體的垃圾回收操作由容器運作時實作。
  • imageGC:負責節點節點上的圖像回收。當存儲鏡像的本地磁盤空間達到一定門檻值時,會觸發鏡像回收,并删除未被 pod 使用的鏡像。
  • Managers:包含管理與 Pod 相關的各種資源的各種管理器。每個經理都有自己的角色,并在 SyncLoop 中一起工作。

Kubelet 的工作原理

如上所述,Kubelet 主要圍繞 SyncLoop 工作。在 go channel 的幫助下,每個元件監聽循環以消費事件或産生 pod 相關的事件到其中,整個控制循環運作事件驅動。這可以用下圖表示。

Kubelet pod 建立工作流程

例如,在建立pod的過程中,當一個pod被分派到一個節點時,會觸發Kubelet在循環控制中注冊的一個handler,比如上圖中的HandlePods部分。此時,Kubelet 會檢查 Kubelet 記憶體中的 Pod 的狀态,确定是需要建立的 Pod,并在 Handler 中觸發 ADD 事件對應的邏輯。

同步循環

讓我們看看主循環,SyncLoop。

func (kl *kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHandler) {
  klog.Info("Starting kubelet main sync loop.")// The syncTicker wakes up Kubelet to checks if there are any pod workers// that need to be sync'd. A one-second period is sufficient because the// sync interval is defaulted to 10s.
  syncTicker := time.NewTicker(time.Second)
  defer syncTicker.Stop()
  housekeepingTicker := time.NewTicker(housekeepingPeriod)
  defer housekeepingTicker.Stop()
  plegCh := kl.pleg.Watch()const (base   = 100 * time.Millisecond
    max    = 5 * time.Second
    factor = 2)
  duration := base// Responsible for checking limits in resolv.conf// The limits do not have anything to do with individual pods// Since this is called in syncLoop, we don't need to call it anywhere elseif kl.dnsConfigurer != nil && kl.dnsConfigurer.ResolverConfig != "" {
    kl.dnsConfigurer.CheckLimitsForResolvConf()}

  for {if err := kl.runtimeState.runtimeErrors(); err != nil {
      klog.Errorf("skipping pod synchronization - %v", err)// exponential backoff
      time.Sleep(duration)
      duration = time.Duration(math.Min(float64(max), factor*float64(duration)))continue}// reset backoff if we have a success
    duration = base

    kl.syncLoopMonitor.Store(kl.clock.Now())if !kl.syncLoopIteration(updates, handler, syncTicker.C, housekeepingTicker.C, plegCh) {break}
    kl.syncLoopMonitor.Store(kl.clock.Now())}
}      

SyncLoop 啟動一個死循環,其中隻調用了 syncLoopIteration 方法。syncLoopIteration 周遊所有傳入通道并将任何有消息的管道移交給處理程式。

這些管道包括:

  • configCh:該通道的生産者由 kubeDeps 對象中的 PodConfig 子子產品提供。該子產品将偵聽來自檔案、http 和 apiserver 的 pod 資訊的更改,并在更新來自源的 pod 資訊時向該通道生成事件。
  • plegCh:該通道的生産者是 pleg 子子產品,它會定期向容器運作時查詢所有容器的目前狀态,如果狀态發生變化,就會産生事件到該通道。
  • syncCh:定期同步最新儲存的 Pod 狀态。
  • livenessManager.Updates():健康檢查發現某個 pod 不可用,Kubelet 會根據 pod 的 restartPolicy 自動執行正确的操作。
  • houseKeepingCh:用于管家事件的管道,進行 pod 清理。

syncLoopIteration 的代碼。

func (kl *kubelet) syncLoopIteration(configCh <-chan kubetypes.PodUpdate, handler SyncHandler,
  syncCh <-chan time.Time, housekeepingCh <-chan time.Time, plegCh <-chan *pleg.PodLifecycleEvent) bool {select {case u, open := <-configCh:// Update from a config source; dispatch it to the right handler// callback.if !open {
      klog.Errorf("Update channel is closed. Exiting the sync loop.")return false}

    switch u.Op {case kubetypes.ADD:
      klog.V(2).Infof("SyncLoop (ADD, %q): %q", u.Source, format.Pods(u.Pods))// After restarting, Kubelet will get all existing pods through// ADD as if they are new pods. These pods will then go through the// admission process and *may* be rejected. This can be resolved// once we have checkpointing.
      handler.HandlePodAdditions(u.Pods)case kubetypes.UPDATE:
      klog.V(2).Infof("SyncLoop (UPDATE, %q): %q", u.Source, format.PodsWithDeletionTimestamps(u.Pods))
      handler.HandlePodUpdates(u.Pods)case kubetypes.REMOVE:
      klog.V(2).Infof("SyncLoop (REMOVE, %q): %q", u.Source, format.Pods(u.Pods))
      handler.HandlePodRemoves(u.Pods)case kubetypes.RECONCILE:
      klog.V(4).Infof("SyncLoop (RECONCILE, %q): %q", u.Source, format.Pods(u.Pods))
      handler.HandlePodReconcile(u.Pods)case kubetypes.DELETE:
      klog.V(2).Infof("SyncLoop (DELETE, %q): %q", u.Source, format.Pods(u.Pods))// DELETE is treated as a UPDATE because of graceful deletion.
      handler.HandlePodUpdates(u.Pods)case kubetypes.SET:// TODO: Do we want to support this?
      klog.Errorf("Kubelet does not support snapshot update")default:
      klog.Errorf("Invalid event type received: %d.", u.Op)}

    kl.sourcesReady.AddSource(u.Source) case e := <-plegCh:if e.Type == pleg.ContainerStarted {// record the most recent time we observed a container start for this pod.// this lets us selectively invalidate the runtimeCache when processing a delete for this pod// to make sure we don't miss handling graceful termination for containers we reported as having started.
      kl.lastContainerStartedTime.Add(e.ID, time.Now())}if isSyncPodWorthy(e) {// PLEG event for a pod; sync it.if pod, ok := kl.podManager.GetPodByUID(e.ID); ok {
        klog.V(2).Infof("SyncLoop (PLEG): %q, event: %#v", format.Pod(pod), e)
        handler.HandlePodSyncs([]*v1.Pod{pod})} else {// If the pod no longer exists, ignore the event.
        klog.V(4).Infof("SyncLoop (PLEG): ignore irrelevant event: %#v", e)}}

    if e.Type == pleg.ContainerDied {if containerID, ok := e.Data.(string); ok {
        kl.cleanUpContainersInPod(e.ID, containerID)}}case <-syncCh:// Sync pods waiting for sync
    podsToSync := kl.getPodsToSync()if len(podsToSync) == 0 {break}
    klog.V(4).Infof("SyncLoop (SYNC): %d pods; %s", len(podsToSync), format.Pods(podsToSync))
    handler.HandlePodSyncs(podsToSync)case update := <-kl.livenessManager.Updates():if update.Result == proberesults.Failure {// The liveness manager detected a failure; sync the pod.

      // We should not use the pod from livenessManager, because it is never updated after// initialization.
      pod, ok := kl.podManager.GetPodByUID(update.PodUID)if !ok {// If the pod no longer exists, ignore the update.
        klog.V(4).Infof("SyncLoop (container unhealthy): ignore irrelevant update: %#v", update)break}
      klog.V(1).Infof("SyncLoop (container unhealthy): %q", format.Pod(pod))
      handler.HandlePodSyncs([]*v1.Pod{pod})}case <-housekeepingCh:if !kl.sourcesReady.AllReady() {// If the sources aren't ready or volume manager has not yet synced the states,// skip housekeeping, as we may accidentally delete pods from unready sources.
      klog.V(4).Infof("SyncLoop (housekeeping, skipped): sources aren't ready yet.")} else {
      klog.V(4).Infof("SyncLoop (housekeeping)")if err := handler.HandlePodCleanups(); err != nil {
        klog.Errorf("Failed cleaning pods: %v", err)}}}return true
}      

建立 Pod 的過程

Kubelet pod 建立過程是由 configCh 中的 ADD 事件觸發的,是以這裡是 Kubelet 收到 ADD 事件後的主要流程。

處理程式

當 configCh 發生 ADD 事件時,循環會觸發 SyncHandler 的 HandlePodAdditions 方法。該方法的流程可以用下面的流程圖來描述。

Kubelet pod 建立工作流程

首先,handler 會對所有的 pod 安裝建立時間進行排序,然後一一處理。

先将pod添加到podManager中,友善後續操作;然後判斷是否為mirror pod,如果是則視為mirror pod,否則視為普通pod,這裡是mirror pod的解釋。

鏡像 pod 是 apiserver 中 kueblet 建立的靜态 pod 的副本。由于靜态 Pod 由 Kubelet 直接管理,是以 apiserver 不知道靜态 Pod 的存在,其生命周期由 Kubelet 直接托管。為了通過 kubectl 指令檢視對應的 Pod,以及通過 kubectl logs 指令直接檢視靜态 Pod 的日志,Kubelet 通過 apiserver 為每個靜态 Pod 建立一個鏡像 Pod。

下一步就是判斷 pod 是否可以在節點上運作,這在 Kubelet 中也稱為 pod 通路控制。通路控制主要包括這幾個方面:

  1. 節點是否滿足 pod 親和性規則
  2. 節點是否有足夠的資源配置設定給 pod
  3. 節點是使用HostNetwork還是HostIPC,如果是,是否在節點的白名單中
  4. /proc 挂載目錄滿足要求
  5. 是否配置了 pod 以及是否配置了正确的 AppArmor

當所有條件都滿足時,最終觸發 podWorker 同步 pod。

​HandlePodAdditions ​

​對應的代碼如下。

func (kl *kubelet) HandlePodAdditions(pods []*v1.Pod) {
  start := kl.clock.Now()
  sort.Sort(sliceutils.PodsByCreationTime(pods))for _, pod := range pods {
    existingPods := kl.podManager.GetPods()// Always add the pod to the pod manager. Kubelet relies on the pod// manager as the source of truth for the desired state. If a pod does// not exist in the pod manager, it means that it has been deleted in// the apiserver and no action (other than cleanup) is required.
    kl.podManager.AddPod(pod)

    if kubetypes.IsMirrorPod(pod) {
      kl.handleMirrorPod(pod, start)continue}

    if !kl.podIsTerminated(pod) {// Only go through the admission process if the pod is not// terminated.

      // We failed pods that we rejected, so activePods include all admitted// pods that are alive.
      activePods := kl.filterOutTerminatedPods(existingPods)

      // Check if we can admit the pod; if not, reject it.if ok, reason, message := kl.canAdmitPod(activePods, pod); !ok {
        kl.rejectPod(pod, reason, message)continue}}
    mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod)
    kl.dispatchWork(pod, kubetypes.SyncPodCreate, mirrorPod, start)
    kl.probeManager.AddPod(pod)}
}      

podWorkers 的工作

接下來,讓我們看看 podWorker 是做什麼的。podWorker 維護一個名為 podUpdates 的 map,以 pod uid 為 key,每個 pod 有一個 channel;當 pod 有事件時,首先從這個 map 中擷取對應的 channel,然後啟動一個 goroutine 監聽這個 channel,并執行 managePodLoop。另一方面,podWorker 将需要同步的 pod 傳遞到該通道中。

managePodLoop 收到事件後,會首先從 pod 緩存中擷取 pod 的最新狀态,以保證目前正在處理的 pod 是最新的;然後調用syncPod方法将同步結果記錄在workQueue中,等待下一個定時同步任務。

整個過程如下圖所示。

Kubelet pod 建立工作流程

podWorker 中處理 pod 事件的代碼。

func (p *podWorkers) UpdatePod(options *UpdatePodOptions) {
  pod := options.Pod
  uid := pod.UID
  var podUpdates chan UpdatePodOptionsvar exists bool

  p.podLock.Lock()
  defer p.podLock.Unlock()if podUpdates, exists = p.podUpdates[uid]; !exists {
    podUpdates = make(chan UpdatePodOptions, 1)
    p.podUpdates[uid] = podUpdates

    go func() {
      defer runtime.HandleCrash()
      p.managePodLoop(podUpdates)}()}if !p.isWorking[pod.UID] {
    p.isWorking[pod.UID] = true
    podUpdates <- *options
  } else {// if a request to kill a pod is pending, we do not let anything overwrite that request.
    update, found := p.lastUndeliveredWorkUpdate[pod.UID]if !found || update.UpdateType != kubetypes.SyncPodKill {
      p.lastUndeliveredWorkUpdate[pod.UID] = *options   }}
}

func (p *podWorkers) managePodLoop(podUpdates <-chan UpdatePodOptions) {var lastSyncTime time.Timefor update := range podUpdates {
    err := func() error {
      podUID := update.Pod.UID
      status, err := p.podCache.GetNewerThan(podUID, lastSyncTime)if err != nil {
        p.recorder.Eventf(update.Pod, v1.EventTypeWarning, events.FailedSync, "error determining status: %v", err)return err
      }
      err = p.syncPodFn(syncPodOptions{
        mirrorPod:      update.MirrorPod,
        pod:            update.Pod,
        podStatus:      status,
        killPodOptions: update.KillPodOptions,
        updateType:     update.UpdateType,})
      lastSyncTime = time.Now()return err
    }()// notify the call-back function if the operation succeeded or notif update.OnCompleteFunc != nil {
      update.OnCompleteFunc(err)}if err != nil {// IMPORTANT: we do not log errors here, the syncPodFn is responsible for logging errors
      klog.Errorf("Error syncing pod %s (%q), skipping: %v", update.Pod.UID, format.Pod(update.Pod), err)}
    p.wrapUp(update.Pod.UID, err)}
}      

同步Pod

上面的podWorker在managePodLoop中調用的syncPod方法實際上就是Kubelet對象的SyncPod方法,在檔案pkg/kubelet/kubelet.go中。

此方法是實際與容器運作時層互動的方法。首先判斷是否為kill事件,如果是則直接調用runtime的killPod;然後判斷是否可以在節點上運作,也就是上面提到的 Kubelet 通路控制;然後判斷CNI插件是否準備好,如果沒有,隻建立并更新。然後判斷pod是否為靜态pod,如果是,則建立對應的鏡像pod;然後建立需要挂載pod的目錄;最後調用運作時的syncPod。

整個過程如下圖所示。

Kubelet pod 建立工作流程

Kubelet 的 syncPod 代碼如下。為了了解主要流程,我去掉了一些優化代碼,有興趣的可以自己檢視源碼。

func (kl *kubelet) syncPod(o syncPodOptions) error {// pull out the required options
   pod := o.pod
   mirrorPod := o.mirrorPod
   podStatus := o.podStatus
   updateType := o.updateType

   // if we want to kill a pod, do it now!if updateType == kubetypes.SyncPodKill {...if err := kl.killPod(pod, nil, podStatus, killPodOptions.PodTerminationGracePeriodSecondsOverride); err != nil {
         kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err)// there was an error killing the pod, so we return that error directly
         utilruntime.HandleError(err)return err
      }return nil}...
   runnable := kl.canRunPod(pod)if !runnable.Admit {...}
   ...// If the network plugin is not ready, only start the pod if it uses the host networkif err := kl.runtimeState.networkErrors(); err != nil && !kubecontainer.IsHostNetworkPod(pod) {
      kl.recorder.Eventf(pod, v1.EventTypeWarning, events.NetworkNotReady, "%s: %v", NetworkNotReadyErrorMsg, err)return fmt.Errorf("%s: %v", NetworkNotReadyErrorMsg, err)}...if !kl.podIsTerminated(pod) {...if !(podKilled && pod.Spec.RestartPolicy == v1.RestartPolicyNever) {if !pcm.Exists(pod) {if err := kl.containerManager.UpdateQOSCgroups(); err != nil {
               klog.V(2).Infof("Failed to update QoS cgroups while syncing pod: %v", err)}...}}}

   // Create Mirror Pod for Static Pod if it doesn't already existif kubetypes.IsStaticPod(pod) {...}if mirrorPod == nil || deleted {
         node, err := kl.GetNode()if err != nil || node.DeletionTimestamp != nil {
            klog.V(4).Infof("No need to create a mirror pod, since node %q has been removed from the cluster", kl.nodeName)} else {
            klog.V(4).Infof("Creating a mirror pod for static pod %q", format.Pod(pod))if err := kl.podManager.CreateMirrorPod(pod); err != nil {
               klog.Errorf("Failed creating a mirror pod for %q: %v", format.Pod(pod), err)}}}}

   // Make data directories for the podif err := kl.makePodDataDirs(pod); err != nil {
      kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToMakePodDataDirectories, "error making pod data directories: %v", err)
      klog.Errorf("Unable to make pod data directories for pod %q: %v", format.Pod(pod), err)return err
   }

   // Volume manager will not mount volumes for terminated podsif !kl.podIsTerminated(pod) {// Wait for volumes to attach/mountif err := kl.volumeManager.WaitForAttachAndMount(pod); err != nil {
         kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedMountVolume, "Unable to attach or mount volumes: %v", err)
         klog.Errorf("Unable to attach or mount volumes for pod %q: %v; skipping pod", format.Pod(pod), err)return err
      }}

   // Fetch the pull secrets for the pod
   pullSecrets := kl.getPullSecretsForPod(pod)

   // Call the container runtime's SyncPod callback
   result := kl.containerRuntime.SyncPod(pod, podStatus, pullSecrets, kl.backOff)
   kl.reasonCache.Update(pod.UID, result)if err := result.Error(); err != nil {// Do not return error if the only failures were pods in backofffor _, r := range result.SyncResults {if r.Error != kubecontainer.ErrCrashLoopBackOff && r.Error != images.ErrImagePullBackOff {// Do not record an event here, as we keep all event logging for sync pod failures// local to container runtime so we get better errorsreturn err
         }}return nil}

   return nil
}      

整個建立pod的過程就到了runtime層的syncPod部分,這裡就看一下流程。

Kubelet pod 建立工作流程

過程很清晰,先計算pod沙箱和容器變化,如果沙箱發生變化,則将pod殺死,然後殺死其相關容器;然後為 pod 建立一個沙箱(無論是需要建立的 pod 還是沙箱已更改并被删除的 pod);後面是啟動臨時容器、初始化容器和業務容器。

其中,臨時容器是 k8s v1.16 的新特性,它臨時運作在現有的 Pod 中,以完成使用者發起的操作,例如故障排除。

整個代碼如下,這裡又去掉了一些優化代碼來展示主流程。

func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {// Step 1: Compute sandbox and container changes.
  podContainerChanges := m.computePodActions(pod, podStatus)
  klog.V(3).Infof("computePodActions got %+v for pod %q", podContainerChanges, format.Pod(pod))if podContainerChanges.CreateSandbox {ref, err := ref.GetReference(legacyscheme.Scheme, pod)if err != nil {
      klog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)}...}

  // Step 2: Kill the pod if the sandbox has changed.if podContainerChanges.KillPod {
    killResult := m.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil)
    result.AddPodSyncResult(killResult)...

  } else {// Step 3: kill any running containers in this pod which are not to keep.for containerID, containerInfo := range podContainerChanges.ContainersToKill {...

      if err := m.killContainer(pod, containerID, containerInfo.name, containerInfo.message, nil); err != nil {
        killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
        klog.Errorf("killContainer %q(id=%q) for pod %q failed: %v", containerInfo.name, containerID, format.Pod(pod), err)return}}}...// Step 4: Create a sandbox for the pod if necessary.
  podSandboxID := podContainerChanges.SandboxIDif podContainerChanges.CreateSandbox {var msg stringvar err error
    ...
    podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)if err != nil {...}
    klog.V(4).Infof("Created PodSandbox %q for pod %q", podSandboxID, format.Pod(pod))...}

  ...

  // Step 5: start ephemeral containersif utilfeature.DefaultFeatureGate.Enabled(features.EphemeralContainers) {for _, idx := range podContainerChanges.EphemeralContainersToStart {
      start("ephemeral container", ephemeralContainerStartSpec(&pod.Spec.EphemeralContainers[idx]))}}

  // Step 6: start the init container.if container := podContainerChanges.NextInitContainerToStart; container != nil {// Start the next init container.if err := start("init container", containerStartSpec(container)); err != nil {return}
        ...}

  // Step 7: start containers in podContainerChanges.ContainersToStart.for _, idx := range podContainerChanges.ContainersToStart {
    start("container", containerStartSpec(&pod.Spec.Containers[idx]))}

  return
}      

最後,讓我們看看什麼是沙盒。在計算機安全領域,沙箱是一種隔離程式以限制不可信程序權限的機制。docker 在容器中使用這種技術,為每個容器建立一個沙箱,定義其 cgroup 和各種命名空間來隔離容器;k8s 中的每個 pod 為 k8s 中的每個 pod 共享一個沙箱,是以同一個 pod 中的所有容器可以互操作并與外界隔離。

讓我們看一下在 Kubelet 中為 Pod 建立沙箱的過程。首先定義Pod的DNS配置、HostName、日志路徑、沙盒端口,這些都是Pod中的容器共享的;然後定義pod的linux配置,包括父cgroup、IPC/Network/Pid命名空間、sysctls、Linux權限;一切都配置好之後,那麼整個流程如下。

Kubelet pod 建立工作流程

源代碼

func (m *kubeGenericRuntimeManager) createPodSandbox(pod *v1.Pod, attempt uint32) (string, string, error) {
  podSandboxConfig, err := m.generatePodSandboxConfig(pod, attempt)...

  // Create pod logs directory
  err = m.osInterface.MkdirAll(podSandboxConfig.LogDirectory, 0755)...
  podSandBoxID, err := m.runtimeService.RunPodSandbox(podSandboxConfig, runtimeHandler)...return podSandBoxID, "", nil
}

func (m *kubeGenericRuntimeManager) generatePodSandboxConfig(pod *v1.Pod, attempt uint32) (*runtimeapi.PodSandboxConfig, error) {
  podUID := string(pod.UID)
  podSandboxConfig := &runtimeapi.PodSandboxConfig{Metadata: &runtimeapi.PodSandboxMetadata{Name:      pod.Name,Namespace: pod.Namespace,Uid:       podUID,Attempt:   attempt,},Labels:      newPodLabels(pod),Annotations: newPodAnnotations(pod),}

  dnsConfig, err := m.runtimeHelper.GetPodDNS(pod)...
  podSandboxConfig.DnsConfig = dnsConfig

  if !kubecontainer.IsHostNetworkPod(pod) {
    podHostname, podDomain, err := m.runtimeHelper.GeneratePodHostNameAndDomain(pod)
    podHostname, err = util.GetNodenameForKernel(podHostname, podDomain, pod.Spec.SetHostnameAsFQDN)
    podSandboxConfig.Hostname = podHostname
  }

  logDir := BuildPodLogsDirectory(pod.Namespace, pod.Name, pod.UID)
  podSandboxConfig.LogDirectory = logDir

  portMappings := []*runtimeapi.PortMapping{}for _, c := range pod.Spec.Containers {
    containerPortMappings := kubecontainer.MakePortMappings(&c)...}if len(portMappings) > 0 {
    podSandboxConfig.PortMappings = portMappings
  }

  lc, err := m.generatePodSandboxLinuxConfig(pod)...
  podSandboxConfig.Linux = lc

  return podSandboxConfig, nil
}

// generatePodSandboxLinuxConfig generates LinuxPodSandboxConfig from v1.Pod.
func (m *kubeGenericRuntimeManager) generatePodSandboxLinuxConfig(pod *v1.Pod) (*runtimeapi.LinuxPodSandboxConfig, error) {
  cgroupParent := m.runtimeHelper.GetPodCgroupParent(pod)
  lc := &runtimeapi.LinuxPodSandboxConfig{CgroupParent: cgroupParent,SecurityContext: &runtimeapi.LinuxSandboxSecurityContext{Privileged: kubecontainer.HasPrivilegedContainer(pod),SeccompProfilePath: v1.SeccompProfileRuntimeDefault,},}

  sysctls := make(map[string]string)if utilfeature.DefaultFeatureGate.Enabled(features.Sysctls) {if pod.Spec.SecurityContext != nil {for _, c := range pod.Spec.SecurityContext.Sysctls {
        sysctls[c.Name] = c.Value}}}

  lc.Sysctls = sysctls

  if pod.Spec.SecurityContext != nil {
    sc := pod.Spec.SecurityContextif sc.RunAsUser != nil {
      lc.SecurityContext.RunAsUser = &runtimeapi.Int64Value{Value: int64(*sc.RunAsUser)}}if sc.RunAsGroup != nil {
      lc.SecurityContext.RunAsGroup = &runtimeapi.Int64Value{Value: int64(*sc.RunAsGroup)}}
    lc.SecurityContext.NamespaceOptions = namespacesForPod(pod)

    if sc.FSGroup != nil {
      lc.SecurityContext.SupplementalGroups = append(lc.SecurityContext.SupplementalGroups, int64(*sc.FSGroup))}if groups := m.runtimeHelper.GetExtraSupplementalGroupsForPod(pod); len(groups) > 0 {
      lc.SecurityContext.SupplementalGroups = append(lc.SecurityContext.SupplementalGroups, groups...)}if sc.SupplementalGroups != nil {for _, sg := range sc.SupplementalGroups {
        lc.SecurityContext.SupplementalGroups = append(lc.SecurityContext.SupplementalGroups, int64(sg))}}if sc.SELinuxOptions != nil {
      lc.SecurityContext.SelinuxOptions = &runtimeapi.SELinuxOption{User:  sc.SELinuxOptions.User,Role:  sc.SELinuxOptions.Role,Type:  sc.SELinuxOptions.Type,Level: sc.SELinuxOptions.Level,}}}

  return lc, nil
}      

概括

繼續閱讀