K8s核心资源对象-Pod(创建流程)

基于1.25

如何创建Pod

容器通过kubelet 创建


// SyncPod syncs the running pod into the desired pod by executing following steps:
//
// 1. Compute sandbox and container changes.
// 2. Kill pod sandbox if necessary.
// 3. Kill any containers that should not be running.
// 4. Create sandbox if necessary.
// 5. Create ephemeral containers.
// 6. Create init containers.
// 7. Create normal containers.
func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
// Step 1: Compute sandbox and container changes.
podContainerChanges := m.computePodActions(pod, podStatus)
klog.V(3).InfoS("computePodActions got for pod", "podActions", podContainerChanges, "pod", klog.KObj(pod))
if podContainerChanges.CreateSandbox {
ref, err := ref.GetReference(legacyscheme.Scheme, pod)
if err != nil {
klog.ErrorS(err, "Couldn't make a ref to pod", "pod", klog.KObj(pod))
}
if podContainerChanges.SandboxID != "" {
m.recorder.Eventf(ref, v1.EventTypeNormal, events.SandboxChanged, "Pod sandbox changed, it will be killed and re-created.")
} else {
klog.V(4).InfoS("SyncPod received new pod, will create a sandbox for it", "pod", klog.KObj(pod))
}
}

// Step 2: Kill the pod if the sandbox has changed.
if podContainerChanges.KillPod {
if podContainerChanges.CreateSandbox {
klog.V(4).InfoS("Stopping PodSandbox for pod, will start new one", "pod", klog.KObj(pod))
} else {
klog.V(4).InfoS("Stopping PodSandbox for pod, because all other containers are dead", "pod", klog.KObj(pod))
}

killResult := m.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil)
result.AddPodSyncResult(killResult)
if killResult.Error() != nil {
klog.ErrorS(killResult.Error(), "killPodWithSyncResult failed")
return
}

if podContainerChanges.CreateSandbox {
m.purgeInitContainers(pod, podStatus)
}
} else {
// Step 3: kill any running containers in this pod which are not to keep.
for containerID, containerInfo := range podContainerChanges.ContainersToKill {
klog.V(3).InfoS("Killing unwanted container for pod", "containerName", containerInfo.name, "containerID", containerID, "pod", klog.KObj(pod))
killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerInfo.name)
result.AddSyncResult(killContainerResult)
if err := m.killContainer(pod, containerID, containerInfo.name, containerInfo.message, containerInfo.reason, nil); err != nil {
killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
klog.ErrorS(err, "killContainer for pod failed", "containerName", containerInfo.name, "containerID", containerID, "pod", klog.KObj(pod))
return
}
}
}

// Keep terminated init containers fairly aggressively controlled
// This is an optimization because container removals are typically handled
// by container garbage collector.
m.pruneInitContainersBeforeStart(pod, podStatus)

// We pass the value of the PRIMARY podIP and list of podIPs down to
// generatePodSandboxConfig and generateContainerConfig, which in turn
// passes it to various other functions, in order to facilitate functionality
// that requires this value (hosts file and downward API) and avoid races determining
// the pod IP in cases where a container requires restart but the
// podIP isn't in the status manager yet. The list of podIPs is used to
// generate the hosts file.
//
// We default to the IPs in the passed-in pod status, and overwrite them if the
// sandbox needs to be (re)started.
var podIPs []string
if podStatus != nil {
podIPs = podStatus.IPs
}

// Step 4: Create a sandbox for the pod if necessary.
podSandboxID := podContainerChanges.SandboxID
if podContainerChanges.CreateSandbox {
var msg string
var err error

klog.V(4).InfoS("Creating PodSandbox for pod", "pod", klog.KObj(pod))
metrics.StartedPodsTotal.Inc()
createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
result.AddSyncResult(createSandboxResult)

// ConvertPodSysctlsVariableToDotsSeparator converts sysctl variable
// in the Pod.Spec.SecurityContext.Sysctls slice into a dot as a separator.
// runc uses the dot as the separator to verify whether the sysctl variable
// is correct in a separate namespace, so when using the slash as the sysctl
// variable separator, runc returns an error: "sysctl is not in a separate kernel namespace"
// and the podSandBox cannot be successfully created. Therefore, before calling runc,
// we need to convert the sysctl variable, the dot is used as a separator to separate the kernel namespace.
// When runc supports slash as sysctl separator, this function can no longer be used.
sysctl.ConvertPodSysctlsVariableToDotsSeparator(pod.Spec.SecurityContext)

podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
if err != nil {
// createPodSandbox can return an error from CNI, CSI,
// or CRI if the Pod has been deleted while the POD is
// being created. If the pod has been deleted then it's
// not a real error.
//
// SyncPod can still be running when we get here, which
// means the PodWorker has not acked the deletion.
if m.podStateProvider.IsPodTerminationRequested(pod.UID) {
klog.V(4).InfoS("Pod was deleted and sandbox failed to be created", "pod", klog.KObj(pod), "podUID", pod.UID)
return
}
metrics.StartedPodsErrorsTotal.Inc()
createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
klog.ErrorS(err, "CreatePodSandbox for pod failed", "pod", klog.KObj(pod))
ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
if referr != nil {
klog.ErrorS(referr, "Couldn't make a ref to pod", "pod", klog.KObj(pod))
}
m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedCreatePodSandBox, "Failed to create pod sandbox: %v", err)
return
}
klog.V(4).InfoS("Created PodSandbox for pod", "podSandboxID", podSandboxID, "pod", klog.KObj(pod))

resp, err := m.runtimeService.PodSandboxStatus(podSandboxID, false)
if err != nil {
ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
if referr != nil {
klog.ErrorS(referr, "Couldn't make a ref to pod", "pod", klog.KObj(pod))
}
m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedStatusPodSandBox, "Unable to get pod sandbox status: %v", err)
klog.ErrorS(err, "Failed to get pod sandbox status; Skipping pod", "pod", klog.KObj(pod))
result.Fail(err)
return
}
if resp.GetStatus() == nil {
result.Fail(errors.New("pod sandbox status is nil"))
return
}

// If we ever allow updating a pod from non-host-network to
// host-network, we may use a stale IP.
if !kubecontainer.IsHostNetworkPod(pod) {
// Overwrite the podIPs passed in the pod status, since we just started the pod sandbox.
podIPs = m.determinePodSandboxIPs(pod.Namespace, pod.Name, resp.GetStatus())
klog.V(4).InfoS("Determined the ip for pod after sandbox changed", "IPs", podIPs, "pod", klog.KObj(pod))
}
}

// the start containers routines depend on pod ip(as in primary pod ip)
// instead of trying to figure out if we have 0 < len(podIPs)
// everytime, we short circuit it here
podIP := ""
if len(podIPs) != 0 {
podIP = podIPs[0]
}

// Get podSandboxConfig for containers to start.
configPodSandboxResult := kubecontainer.NewSyncResult(kubecontainer.ConfigPodSandbox, podSandboxID)
result.AddSyncResult(configPodSandboxResult)
podSandboxConfig, err := m.generatePodSandboxConfig(pod, podContainerChanges.Attempt)
if err != nil {
message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err)
klog.ErrorS(err, "GeneratePodSandboxConfig for pod failed", "pod", klog.KObj(pod))
configPodSandboxResult.Fail(kubecontainer.ErrConfigPodSandbox, message)
return
}

// Helper containing boilerplate common to starting all types of containers.
// typeName is a description used to describe this type of container in log messages,
// currently: "container", "init container" or "ephemeral container"
// metricLabel is the label used to describe this type of container in monitoring metrics.
// currently: "container", "init_container" or "ephemeral_container"
start := func(typeName, metricLabel string, spec *startSpec) error {
startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, spec.container.Name)
result.AddSyncResult(startContainerResult)

isInBackOff, msg, err := m.doBackOff(pod, spec.container, podStatus, backOff)
if isInBackOff {
startContainerResult.Fail(err, msg)
klog.V(4).InfoS("Backing Off restarting container in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod))
return err
}

metrics.StartedContainersTotal.WithLabelValues(metricLabel).Inc()
if sc.HasWindowsHostProcessRequest(pod, spec.container) {
metrics.StartedHostProcessContainersTotal.WithLabelValues(metricLabel).Inc()
}
klog.V(4).InfoS("Creating container in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod))
// NOTE (aramase) podIPs are populated for single stack and dual stack clusters. Send only podIPs.
if msg, err := m.startContainer(podSandboxID, podSandboxConfig, spec, pod, podStatus, pullSecrets, podIP, podIPs); err != nil {
// startContainer() returns well-defined error codes that have reasonable cardinality for metrics and are
// useful to cluster administrators to distinguish "server errors" from "user errors".
metrics.StartedContainersErrorsTotal.WithLabelValues(metricLabel, err.Error()).Inc()
if sc.HasWindowsHostProcessRequest(pod, spec.container) {
metrics.StartedHostProcessContainersErrorsTotal.WithLabelValues(metricLabel, err.Error()).Inc()
}
startContainerResult.Fail(err, msg)
// known errors that are logged in other places are logged at higher levels here to avoid
// repetitive log spam
switch {
case err == images.ErrImagePullBackOff:
klog.V(3).InfoS("Container start failed in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod), "containerMessage", msg, "err", err)
default:
utilruntime.HandleError(fmt.Errorf("%v %+v start failed in pod %v: %v: %s", typeName, spec.container, format.Pod(pod), err, msg))
}
return err
}

return nil
}

// Step 5: start ephemeral containers
// These are started "prior" to init containers to allow running ephemeral containers even when there
// are errors starting an init container. In practice init containers will start first since ephemeral
// containers cannot be specified on pod creation.
for _, idx := range podContainerChanges.EphemeralContainersToStart {
start("ephemeral container", metrics.EphemeralContainer, ephemeralContainerStartSpec(&pod.Spec.EphemeralContainers[idx]))
}

// Step 6: start the init container.
if container := podContainerChanges.NextInitContainerToStart; container != nil {
// Start the next init container.
if err := start("init container", metrics.InitContainer, containerStartSpec(container)); err != nil {
return
}

// Successfully started the container; clear the entry in the failure
klog.V(4).InfoS("Completed init container for pod", "containerName", container.Name, "pod", klog.KObj(pod))
}

// Step 7: start containers in podContainerChanges.ContainersToStart.
for _, idx := range podContainerChanges.ContainersToStart {
start("container", metrics.Container, containerStartSpec(&pod.Spec.Containers[idx]))
}

return
}

Pause容器以及创建流程

Pause容器又称Infra容器,为解决Pod中的网络问题

Pause容器的实现

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

#define STRINGIFY(x) #x
#define VERSION_STRING(x) STRINGIFY(x)

#ifndef VERSION
#define VERSION HEAD
#endif

static void sigdown(int signo) {
psignal(signo, "Shutting down, got signal");
exit(0);
}

static void sigreap(int signo) {
while (waitpid(-1, NULL, WNOHANG) > 0)
;
}

int main(int argc, char **argv) {
int i;
for (i = 1; i < argc; ++i) {
if (!strcasecmp(argv[i], "-v")) {
printf("pause.c %s\n", VERSION_STRING(VERSION));
return 0;
}
}

if (getpid() != 1)
/* Not an error because pause sees use outside of infra containers. */
fprintf(stderr, "Warning: pause should be the first process\n");

if (sigaction(SIGINT, &(struct sigaction){.sa_handler = sigdown}, NULL) < 0)
return 1;
if (sigaction(SIGTERM, &(struct sigaction){.sa_handler = sigdown}, NULL) < 0)
return 2;
if (sigaction(SIGCHLD, &(struct sigaction){.sa_handler = sigreap,
.sa_flags = SA_NOCLDSTOP},
NULL) < 0)
return 3;

for (;;)
pause();
fprintf(stderr, "Error: infinite loop terminated\n");
return 42;
}

CRI和SandBox

CRI大概包含三类接口:Sandbox、Container和Image,其中包含了一些容器常用接口,Create、Delete、List等

  • Sandbox:为容器提供一定运行环境,包含Pod的网络
  • Container:包括容器生命周期的操作
  • Image:镜像操作

kubelet通过gRPC调用CRI:

  1. 创建PodSandbox环境
  2. 调用Image或Container拉取镜像、创建容器
  3. Sandbox建立以后,kubelet在Pod中创建容器
  4. 删除Pod,kubelet终止所有容器,移除Sandbox;对于Container将新的容器命名空间加入到Sandbox命名空间

CRI和SandBox都是使用cgroups做资源配额,抽离出一个隔离的运行环境,区别是资源隔离的实现。

  • 对于kata,PodSandbox是虚拟机
  • 对于Docker,PodSandbox是Linux命名空间

createPodSandbox核心流程

在K8s,负责createPodSandbox的是pause容器

  1. generatePodSandBox负责吧PodSpec参数转换为CRI参数
  2. 创建容器运行的日志目录
  3. 根据指定的runtime参数,查找对应的运行时处理器
  4. RunPodSandbox函数创建真正的容器,内部通过调用runtimeClient的RunPodSandbox函数,通过gRPC调用底层的CRI实现

// createPodSandbox creates a pod sandbox and returns (podSandBoxID, message, error).
func (m *kubeGenericRuntimeManager) createPodSandbox(pod *v1.Pod, attempt uint32) (string, string, error) {
podSandboxConfig, err := m.generatePodSandboxConfig(pod, attempt)
if err != nil {
message := fmt.Sprintf("Failed to generate sandbox config for pod %q: %v", format.Pod(pod), err)
klog.ErrorS(err, "Failed to generate sandbox config for pod", "pod", klog.KObj(pod))
return "", message, err
}

// Create pod logs directory
err = m.osInterface.MkdirAll(podSandboxConfig.LogDirectory, 0755)
if err != nil {
message := fmt.Sprintf("Failed to create log directory for pod %q: %v", format.Pod(pod), err)
klog.ErrorS(err, "Failed to create log directory for pod", "pod", klog.KObj(pod))
return "", message, err
}

runtimeHandler := ""
if m.runtimeClassManager != nil {
runtimeHandler, err = m.runtimeClassManager.LookupRuntimeHandler(pod.Spec.RuntimeClassName)
if err != nil {
message := fmt.Sprintf("Failed to create sandbox for pod %q: %v", format.Pod(pod), err)
return "", message, err
}
if runtimeHandler != "" {
klog.V(2).InfoS("Running pod with runtime handler", "pod", klog.KObj(pod), "runtimeHandler", runtimeHandler)
}
}

podSandBoxID, err := m.runtimeService.RunPodSandbox(podSandboxConfig, runtimeHandler)
if err != nil {
message := fmt.Sprintf("Failed to create sandbox for pod %q: %v", format.Pod(pod), err)
klog.ErrorS(err, "Failed to create sandbox for pod", "pod", klog.KObj(pod))
return "", message, err
}

return podSandBoxID, "", nil
}

生成Pause容器的配置

PodSpec指定的参数最终转换为底层CRI的参数,runtimeapi.PodSandboxConfig转换为kubectlGenericRuntimeManager.generatePodSandboxCofnig.

Contaninerd启动Pause流程

K8s从1.24 废弃DOckershim,切换为Containerd

下面是Containerd实现启动Pause容器流程:

  1. 获取配置信息,通过PodSpec转换后的runtimeapi.PodSandboxConfig
  2. 根据配置信息进行前置处理
  3. 调用client.SandboxStore().Crate创建容器
  4. 创建网络命名空间
  5. 为容器设置网络环境,内部调用CNI网络插件
  6. 为准备任务环境生成Task,内部调用底层运行时,默认runc
  7. 启动Task,具体调用runc内部启动Task

创建网络命名空间

默认路经:/var/run/netns

func调用顺序:NewNetNS->newNS

newNS实际逻辑如下:

  • 创建/var/run/netns目录作为共享挂载点,用于挂载网络命名空间
  • 在目录下新建空的文件,文件名用随机算法生成

containerd的实现

// Some of the following functions are migrated from
// https://github.com/containernetworking/plugins/blob/main/pkg/testutils/netns_linux.go

// newNS creates a new persistent (bind-mounted) network namespace and returns the
// path to the network namespace.
// If pid is not 0, returns the netns from that pid persistently mounted. Otherwise,
// a new netns is created.
func newNS(baseDir string, pid uint32) (nsPath string, err error) {
b := make([]byte, 16)

_, err = rand.Read(b)
if err != nil {
return "", fmt.Errorf("failed to generate random netns name: %w", err)
}

// Create the directory for mounting network namespaces
// This needs to be a shared mountpoint in case it is mounted in to
// other namespaces (containers)
// 创建挂载目录
if err := os.MkdirAll(baseDir, 0755); err != nil {
return "", err
}

// create an empty file at the mount point and fail if it already exists
在挂载点,创建一个空文件
nsName := fmt.Sprintf("cni-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])
nsPath = path.Join(baseDir, nsName)
mountPointFd, err := os.OpenFile(nsPath, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
if err != nil {
return "", err
}
mountPointFd.Close()

defer func() {
// Ensure the mount point is cleaned up on errors
if err != nil {
os.RemoveAll(nsPath)
}
}()

if pid != 0 {
procNsPath := getNetNSPathFromPID(pid)
// bind mount the netns onto the mount point. This causes the namespace
// to persist, even when there are no threads in the ns.
if err = unix.Mount(procNsPath, nsPath, "none", unix.MS_BIND, ""); err != nil {
return "", fmt.Errorf("failed to bind mount ns src: %v at %s: %w", procNsPath, nsPath, err)
}
return nsPath, nil
}

var wg sync.WaitGroup
wg.Add(1)

// do namespace work in a dedicated goroutine, so that we can safely
// Lock/Unlock OSThread without upsetting the lock/unlock state of
// the caller of this function
//
go (func() {
defer wg.Done()
runtime.LockOSThread()
// Don't unlock. By not unlocking, golang will kill the OS thread when the
// goroutine is done (for go1.10+)

var origNS cnins.NetNS
origNS, err = cnins.GetNS(getCurrentThreadNetNSPath())
if err != nil {
return
}
defer origNS.Close()

// create a new netns on the current thread
// 在当前线程 创建一个新的网络命名空间
err = unix.Unshare(unix.CLONE_NEWNET)
if err != nil {
return
}

// Put this thread back to the orig ns, since it might get reused (pre go1.10)
defer origNS.Set()

// bind mount the netns from the current thread (from /proc) onto the
// mount point. This causes the namespace to persist, even when there
// are no threads in the ns.
// 将当前的线程来自(/proc)的netns绑定到挂载点上
// 实现即使ns没有线程,网络命名空间依旧持续存在
err = unix.Mount(getCurrentThreadNetNSPath(), nsPath, "none", unix.MS_BIND, "")
if err != nil {
err = fmt.Errorf("failed to bind mount ns at %s: %w", nsPath, err)
}
})()
wg.Wait()

if err != nil {
return "", fmt.Errorf("failed to create namespace: %w", err)
}

return nsPath, nil
}

PodSpec生成容器参数

PodSpec中字段最终会生成容器的配置,作为容器启动的参数。主要是通过generateContainerConfig实现。


// generateContainerConfig generates container config for kubelet runtime v1.
func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Container, pod *v1.Pod, restartCount int, podIP, imageRef string, podIPs []string, nsTarget *kubecontainer.ContainerID) (*runtimeapi.ContainerConfig, func(), error) {
opts, cleanupAction, err := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP, podIPs)
if err != nil {
return nil, nil, err
}
uid, username, err := m.getImageUser(container.Image)
if err != nil {
return nil, cleanupAction, err
}

// Verify RunAsNonRoot. Non-root verification only supports numeric user.
if err := verifyRunAsNonRoot(pod, container, uid, username); err != nil {
return nil, cleanupAction, err
}

command, args := kubecontainer.ExpandContainerCommandAndArgs(container, opts.Envs)
logDir := BuildContainerLogsDirectory(pod.Namespace, pod.Name, pod.UID, container.Name)
err = m.osInterface.MkdirAll(logDir, 0755)
if err != nil {
return nil, cleanupAction, fmt.Errorf("create container log directory for container %s failed: %v", container.Name, err)
}
containerLogsPath := buildContainerLogsPath(container.Name, restartCount)
restartCountUint32 := uint32(restartCount)
config := &runtimeapi.ContainerConfig{
Metadata: &runtimeapi.ContainerMetadata{
Name: container.Name,
Attempt: restartCountUint32,
},
Image: &runtimeapi.ImageSpec{Image: imageRef},
Command: command,
Args: args,
WorkingDir: container.WorkingDir,
Labels: newContainerLabels(container, pod),
Annotations: newContainerAnnotations(container, pod, restartCount, opts),
Devices: makeDevices(opts),
Mounts: m.makeMounts(opts, container),
LogPath: containerLogsPath,
Stdin: container.Stdin,
StdinOnce: container.StdinOnce,
Tty: container.TTY,
}

// set platform specific configurations.
if err := m.applyPlatformSpecificContainerConfig(config, container, pod, uid, username, nsTarget); err != nil {
return nil, cleanupAction, err
}

// set environment variables
envs := make([]*runtimeapi.KeyValue, len(opts.Envs))
for idx := range opts.Envs {
e := opts.Envs[idx]
envs[idx] = &runtimeapi.KeyValue{
Key: e.Name,
Value: e.Value,
}
}
config.Envs = envs

return config, cleanupAction, nil
}

PodSpec.Container 配置喝COntainerConfig配置对应关系

PodSpec.Container OntainerConfig
Command Env Command
Args Env Args
WorkingDir WorkingDir
Image Image
Resource Reourse
Overhead Overhead

容器的通用创建流程

临时容器、Init容器、普通容器都是调用startContainer func 创建,只是传入参数不同

StartContainer

  • 使用EnsureImageExists func拉取镜像
  • 使用generateContainerConfig func把PodSpec 声明的Container参数转换为CRI需要的参数
  • PreCreateContainer func实现启动前的处理工作
  • CreateStartContainer func 创建容器,内部通过gRPC实现底层CRI调用
  • PreStartContainer func 处理容器启动前的工作
  • StartContainer func 真正启动函数,内部通过gRPC协议调用底层CRI实现
  • 容器启动之后,调用Pod声明的声明周期的PostStart,主要包含Exec和HTTP俩种形式

Containerd创建容器的流程

// CreateContainer creates a new container in the given PodSandbox.
func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateContainerRequest) (_ *runtime.CreateContainerResponse, retErr error) {
...
var cntr containerd.Container
if cntr, err = c.client.NewContainer(ctx, id, opts...); err != nil {
return nil, fmt.Errorf("failed to create containerd container: %w", err)
}
...
}
// NewContainer will create a new container with the provided id.
// The id must be unique within the namespace.
func (c *Client) NewContainer(ctx context.Context, id string, opts ...NewContainerOpts) (Container, error) {
ctx, done, err := c.WithLease(ctx)
if err != nil {
return nil, err
}
defer done(ctx)

container := containers.Container{
ID: id,
Runtime: containers.RuntimeInfo{
Name: c.runtime,
},
}
for _, o := range opts {
if err := o(ctx, c, &container); err != nil {
return nil, err
}
}
r, err := c.ContainerService().Create(ctx, container)
if err != nil {
return nil, err
}
return containerFromRecord(c, r), nil
}

Containerd启动容器流程

criServer的StartContainer func启动容器

// StartContainer starts the container.
func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) {
// wait is a long running background request, no timeout needed.
exitCh, err := task.Wait(ctrdutil.NamespacedContext())
if err != nil {
return nil, fmt.Errorf("failed to wait for containerd task: %w", err)
}
...
err = c.nri.StartContainer(ctx, &sandbox, &cntr)
if err != nil {
log.G(ctx).WithError(err).Errorf("NRI container start failed")
return nil, fmt.Errorf("NRI container start failed: %w", err)
}
...
// Start containerd task.
if err := task.Start(ctx); err != nil {
return nil, fmt.Errorf("failed to start containerd task %q: %w", id, err)
}
}