K8s核心资源对象-Pod(PodSpec)

基于1.25

什么是Pod

Pod是K8s最小可部署的单元。一个Pod可以有多个容器

type PodSpec struct {
// List of volumes that can be mounted by containers belonging to the pod.
// More info: https://kubernetes.io/docs/concepts/storage/volumes
// +optional
// +patchMergeKey=name
// +patchStrategy=merge,retainKeys
Volumes []Volume `json:"volumes,omitempty" patchStrategy:"merge,retainKeys" patchMergeKey:"name" protobuf:"bytes,1,rep,name=volumes"`
// List of initialization containers belonging to the pod.
// Init containers are executed in order prior to containers being started. If any
// init container fails, the pod is considered to have failed and is handled according
// to its restartPolicy. The name for an init container or normal container must be
// unique among all containers.
// Init containers may not have Lifecycle actions, Readiness probes, Liveness probes, or Startup probes.
// The resourceRequirements of an init container are taken into account during scheduling
// by finding the highest request/limit for each resource type, and then using the max of
// of that value or the sum of the normal containers. Limits are applied to init containers
// in a similar fashion.
// Init containers cannot currently be added or removed.
// Cannot be updated.
// More info: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/
// +patchMergeKey=name
// +patchStrategy=merge
InitContainers []Container `json:"initContainers,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,20,rep,name=initContainers"`
// List of containers belonging to the pod.
// Containers cannot currently be added or removed.
// There must be at least one container in a Pod.
// Cannot be updated.
// +patchMergeKey=name
// +patchStrategy=merge
Containers []Container `json:"containers" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,2,rep,name=containers"`
// List of ephemeral containers run in this pod. Ephemeral containers may be run in an existing
// pod to perform user-initiated actions such as debugging. This list cannot be specified when
// creating a pod, and it cannot be modified by updating the pod spec. In order to add an
// ephemeral container to an existing pod, use the pod's ephemeralcontainers subresource.
// +optional
// +patchMergeKey=name
// +patchStrategy=merge
EphemeralContainers []EphemeralContainer `json:"ephemeralContainers,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,34,rep,name=ephemeralContainers"`
// Restart policy for all containers within the pod.
// One of Always, OnFailure, Never.
// Default to Always.
// More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy
// +optional
RestartPolicy RestartPolicy `json:"restartPolicy,omitempty" protobuf:"bytes,3,opt,name=restartPolicy,casttype=RestartPolicy"`
// Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request.
// Value must be non-negative integer. The value zero indicates stop immediately via
// the kill signal (no opportunity to shut down).
// If this value is nil, the default grace period will be used instead.
// The grace period is the duration in seconds after the processes running in the pod are sent
// a termination signal and the time when the processes are forcibly halted with a kill signal.
// Set this value longer than the expected cleanup time for your process.
// Defaults to 30 seconds.
// +optional
TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" protobuf:"varint,4,opt,name=terminationGracePeriodSeconds"`
// Optional duration in seconds the pod may be active on the node relative to
// StartTime before the system will actively try to mark it failed and kill associated containers.
// Value must be a positive integer.
// +optional
ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty" protobuf:"varint,5,opt,name=activeDeadlineSeconds"`
// Set DNS policy for the pod.
// Defaults to "ClusterFirst".
// Valid values are 'ClusterFirstWithHostNet', 'ClusterFirst', 'Default' or 'None'.
// DNS parameters given in DNSConfig will be merged with the policy selected with DNSPolicy.
// To have DNS options set along with hostNetwork, you have to specify DNS policy
// explicitly to 'ClusterFirstWithHostNet'.
// +optional
DNSPolicy DNSPolicy `json:"dnsPolicy,omitempty" protobuf:"bytes,6,opt,name=dnsPolicy,casttype=DNSPolicy"`
// NodeSelector is a selector which must be true for the pod to fit on a node.
// Selector which must match a node's labels for the pod to be scheduled on that node.
// More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
// +optional
// +mapType=atomic
NodeSelector map[string]string `json:"nodeSelector,omitempty" protobuf:"bytes,7,rep,name=nodeSelector"`

// ServiceAccountName is the name of the ServiceAccount to use to run this pod.
// More info: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
// +optional
ServiceAccountName string `json:"serviceAccountName,omitempty" protobuf:"bytes,8,opt,name=serviceAccountName"`
// DeprecatedServiceAccount is a depreciated alias for ServiceAccountName.
// Deprecated: Use serviceAccountName instead.
// +k8s:conversion-gen=false
// +optional
DeprecatedServiceAccount string `json:"serviceAccount,omitempty" protobuf:"bytes,9,opt,name=serviceAccount"`
// AutomountServiceAccountToken indicates whether a service account token should be automatically mounted.
// +optional
AutomountServiceAccountToken *bool `json:"automountServiceAccountToken,omitempty" protobuf:"varint,21,opt,name=automountServiceAccountToken"`

// NodeName is a request to schedule this pod onto a specific node. If it is non-empty,
// the scheduler simply schedules this pod onto that node, assuming that it fits resource
// requirements.
// +optional
NodeName string `json:"nodeName,omitempty" protobuf:"bytes,10,opt,name=nodeName"`
// Host networking requested for this pod. Use the host's network namespace.
// If this option is set, the ports that will be used must be specified.
// Default to false.
// +k8s:conversion-gen=false
// +optional
HostNetwork bool `json:"hostNetwork,omitempty" protobuf:"varint,11,opt,name=hostNetwork"`
// Use the host's pid namespace.
// Optional: Default to false.
// +k8s:conversion-gen=false
// +optional
HostPID bool `json:"hostPID,omitempty" protobuf:"varint,12,opt,name=hostPID"`
// Use the host's ipc namespace.
// Optional: Default to false.
// +k8s:conversion-gen=false
// +optional
HostIPC bool `json:"hostIPC,omitempty" protobuf:"varint,13,opt,name=hostIPC"`
// Share a single process namespace between all of the containers in a pod.
// When this is set containers will be able to view and signal processes from other containers
// in the same pod, and the first process in each container will not be assigned PID 1.
// HostPID and ShareProcessNamespace cannot both be set.
// Optional: Default to false.
// +k8s:conversion-gen=false
// +optional
ShareProcessNamespace *bool `json:"shareProcessNamespace,omitempty" protobuf:"varint,27,opt,name=shareProcessNamespace"`
// SecurityContext holds pod-level security attributes and common container settings.
// Optional: Defaults to empty. See type description for default values of each field.
// +optional
SecurityContext *PodSecurityContext `json:"securityContext,omitempty" protobuf:"bytes,14,opt,name=securityContext"`
// ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec.
// If specified, these secrets will be passed to individual puller implementations for them to use.
// More info: https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod
// +optional
// +patchMergeKey=name
// +patchStrategy=merge
ImagePullSecrets []LocalObjectReference `json:"imagePullSecrets,omitempty" patchStrategy:"merge" patchMergeKey:"name" protobuf:"bytes,15,rep,name=imagePullSecrets"`
// Specifies the hostname of the Pod
// If not specified, the pod's hostname will be set to a system-defined value.
// +optional
Hostname string `json:"hostname,omitempty" protobuf:"bytes,16,opt,name=hostname"`
// If specified, the fully qualified Pod hostname will be "<hostname>.<subdomain>.<pod namespace>.svc.<cluster domain>".
// If not specified, the pod will not have a domainname at all.
// +optional
Subdomain string `json:"subdomain,omitempty" protobuf:"bytes,17,opt,name=subdomain"`
// If specified, the pod's scheduling constraints
// +optional
Affinity *Affinity `json:"affinity,omitempty" protobuf:"bytes,18,opt,name=affinity"`
// If specified, the pod will be dispatched by specified scheduler.
// If not specified, the pod will be dispatched by default scheduler.
// +optional
SchedulerName string `json:"schedulerName,omitempty" protobuf:"bytes,19,opt,name=schedulerName"`
// If specified, the pod's tolerations.
// +optional
Tolerations []Toleration `json:"tolerations,omitempty" protobuf:"bytes,22,opt,name=tolerations"`
// HostAliases is an optional list of hosts and IPs that will be injected into the pod's hosts
// file if specified. This is only valid for non-hostNetwork pods.
// +optional
// +patchMergeKey=ip
// +patchStrategy=merge
HostAliases []HostAlias `json:"hostAliases,omitempty" patchStrategy:"merge" patchMergeKey:"ip" protobuf:"bytes,23,rep,name=hostAliases"`
// If specified, indicates the pod's priority. "system-node-critical" and
// "system-cluster-critical" are two special keywords which indicate the
// highest priorities with the former being the highest priority. Any other
// name must be defined by creating a PriorityClass object with that name.
// If not specified, the pod priority will be default or zero if there is no
// default.
// +optional
PriorityClassName string `json:"priorityClassName,omitempty" protobuf:"bytes,24,opt,name=priorityClassName"`
// The priority value. Various system components use this field to find the
// priority of the pod. When Priority Admission Controller is enabled, it
// prevents users from setting this field. The admission controller populates
// this field from PriorityClassName.
// The higher the value, the higher the priority.
// +optional
Priority *int32 `json:"priority,omitempty" protobuf:"bytes,25,opt,name=priority"`
// Specifies the DNS parameters of a pod.
// Parameters specified here will be merged to the generated DNS
// configuration based on DNSPolicy.
// +optional
DNSConfig *PodDNSConfig `json:"dnsConfig,omitempty" protobuf:"bytes,26,opt,name=dnsConfig"`
// If specified, all readiness gates will be evaluated for pod readiness.
// A pod is ready when all its containers are ready AND
// all conditions specified in the readiness gates have status equal to "True"
// More info: https://git.k8s.io/enhancements/keps/sig-network/580-pod-readiness-gates
// +optional
ReadinessGates []PodReadinessGate `json:"readinessGates,omitempty" protobuf:"bytes,28,opt,name=readinessGates"`
// RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used
// to run this pod. If no RuntimeClass resource matches the named class, the pod will not be run.
// If unset or empty, the "legacy" RuntimeClass will be used, which is an implicit class with an
// empty definition that uses the default runtime handler.
// More info: https://git.k8s.io/enhancements/keps/sig-node/585-runtime-class
// +optional
RuntimeClassName *string `json:"runtimeClassName,omitempty" protobuf:"bytes,29,opt,name=runtimeClassName"`
// EnableServiceLinks indicates whether information about services should be injected into pod's
// environment variables, matching the syntax of Docker links.
// Optional: Defaults to true.
// +optional
EnableServiceLinks *bool `json:"enableServiceLinks,omitempty" protobuf:"varint,30,opt,name=enableServiceLinks"`
// PreemptionPolicy is the Policy for preempting pods with lower priority.
// One of Never, PreemptLowerPriority.
// Defaults to PreemptLowerPriority if unset.
// +optional
PreemptionPolicy *PreemptionPolicy `json:"preemptionPolicy,omitempty" protobuf:"bytes,31,opt,name=preemptionPolicy"`
// Overhead represents the resource overhead associated with running a pod for a given RuntimeClass.
// This field will be autopopulated at admission time by the RuntimeClass admission controller. If
// the RuntimeClass admission controller is enabled, overhead must not be set in Pod create requests.
// The RuntimeClass admission controller will reject Pod create requests which have the overhead already
// set. If RuntimeClass is configured and selected in the PodSpec, Overhead will be set to the value
// defined in the corresponding RuntimeClass, otherwise it will remain unset and treated as zero.
// More info: https://git.k8s.io/enhancements/keps/sig-node/688-pod-overhead/README.md
// +optional
Overhead ResourceList `json:"overhead,omitempty" protobuf:"bytes,32,opt,name=overhead"`
// TopologySpreadConstraints describes how a group of pods ought to spread across topology
// domains. Scheduler will schedule pods in a way which abides by the constraints.
// All topologySpreadConstraints are ANDed.
// +optional
// +patchMergeKey=topologyKey
// +patchStrategy=merge
// +listType=map
// +listMapKey=topologyKey
// +listMapKey=whenUnsatisfiable
TopologySpreadConstraints []TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty" patchStrategy:"merge" patchMergeKey:"topologyKey" protobuf:"bytes,33,opt,name=topologySpreadConstraints"`
// If true the pod's hostname will be configured as the pod's FQDN, rather than the leaf name (the default).
// In Linux containers, this means setting the FQDN in the hostname field of the kernel (the nodename field of struct utsname).
// In Windows containers, this means setting the registry value of hostname for the registry key HKEY_LOCAL_MACHINE\\SYSTEM\\CurrentControlSet\\Services\\Tcpip\\Parameters to FQDN.
// If a pod does not have FQDN, this has no effect.
// Default to false.
// +optional
SetHostnameAsFQDN *bool `json:"setHostnameAsFQDN,omitempty" protobuf:"varint,35,opt,name=setHostnameAsFQDN"`
// Specifies the OS of the containers in the pod.
// Some pod and container fields are restricted if this is set.
//
// If the OS field is set to linux, the following fields must be unset:
// -securityContext.windowsOptions
//
// If the OS field is set to windows, following fields must be unset:
// - spec.hostPID
// - spec.hostIPC
// - spec.hostUsers
// - spec.securityContext.seLinuxOptions
// - spec.securityContext.seccompProfile
// - spec.securityContext.fsGroup
// - spec.securityContext.fsGroupChangePolicy
// - spec.securityContext.sysctls
// - spec.shareProcessNamespace
// - spec.securityContext.runAsUser
// - spec.securityContext.runAsGroup
// - spec.securityContext.supplementalGroups
// - spec.containers[*].securityContext.seLinuxOptions
// - spec.containers[*].securityContext.seccompProfile
// - spec.containers[*].securityContext.capabilities
// - spec.containers[*].securityContext.readOnlyRootFilesystem
// - spec.containers[*].securityContext.privileged
// - spec.containers[*].securityContext.allowPrivilegeEscalation
// - spec.containers[*].securityContext.procMount
// - spec.containers[*].securityContext.runAsUser
// - spec.containers[*].securityContext.runAsGroup
// +optional
OS *PodOS `json:"os,omitempty" protobuf:"bytes,36,opt,name=os"`
// Use the host's user namespace.
// Optional: Default to true.
// If set to true or not present, the pod will be run in the host user namespace, useful
// for when the pod needs a feature only available to the host user namespace, such as
// loading a kernel module with CAP_SYS_MODULE.
// When set to false, a new userns is created for the pod. Setting false is useful for
// mitigating container breakout vulnerabilities even allowing users to run their
// containers as root without actually having root privileges on the host.
// This field is alpha-level and is only honored by servers that enable the UserNamespacesSupport feature.
// +k8s:conversion-gen=false
// +optional
HostUsers *bool `json:"hostUsers,omitempty" protobuf:"bytes,37,opt,name=hostUsers"`
}

主要分为几类:

  • 容器相关:InitContaners、Containers、EphemeralContainers、ImagePullSecrets、EnableServiceLinks、OS
  • 调度相关:NodeSelector、NodeName、Affinity、Tolerations、SchedulerName、RuntimeClassName、Overhead、PriorityClassName、Priority、PreemptionPolicy、TopologySpreadConstrains
  • 存储相关:Volumes
  • Pod生命周期相关:RestartPolicy、TerminationGracePeriodSeconds、ActiveDeadlineSeconds、ReadinessGate
  • 主机名和DNS相关字段:Hostname、SetHostnameAsFQDN、Subdomain、HostAliases、DNSConfig、DNSPolicy
  • 主机Namespace相关字段:HostNetwork、HostPID、HostPIPC、ShareProcessNamespace、HostUsers(aplha阶段)
  • ServiceAccount相关字段:ServiceAccountName、AutomountServiceAccountToken、ServiceAccount(弃用)

容器相关字段

Pod主要有三类容器:Init容器(InitContainer)、普通容器(Containers)、临时容器(EphemeralContainers)

InitConatiner

  • Init容器是一个特殊的容器,在普通容器启动之前顺序执行,如果任何Init容器失败,则该Pod失败,根据RestartPolcy进行处理
  • Init拥有应用容器的特性,但是不支持lifecycle、livenessProbe、readinessProbe、startupProbe
  • Init按照顺序执行

Containers

  • Pod中至少有一个是普通容器
  • 如果有多个容器,其中一个是主容器,其他容器上说Sidecar容器,辅助主容器完成功能

EphemeralContainers

ImagePullSecrets

  • 定义Pod中任何镜像账号信息
// Make the environment variables for a pod in the given namespace.
func (kl *Kubelet) makeEnvironmentVariables(pod *v1.Pod, container *v1.Container, podIP string, podIPs []string) ([]kubecontainer.EnvVar, error) {
// 如果EnableServiceLinks=nil 不能生存环境变量
if pod.Spec.EnableServiceLinks == nil {
return nil, fmt.Errorf("nil pod.spec.enableServiceLinks encountered, cannot construct envvars")
}

OS

  • 指定Pod的操作系统,当前支持linux和windows
  • 如果设置了此属性,则不属于该类型的Pod和容器字段收到影响

调度相关字段

NodeSelector

  • 基于NodeSelector通过K8s的label-selector机制选择调度到某个节点上

NodeName

Pod运行的节点信息,一般调度成功之后,调度器设置

Affinity

一组节点亲和调度规则

  • NodeAffinity:Pod的节点调度规则
  • PodAffinity: Pod的亲和性调度规则。比如Pod和一些Pod放在一起
  • PodAntiAffinity:Pod反亲和调度规则。比如避免Pod放在一起

Tolerations

容忍五点,为了让某些Pod不再调度到某些节点,通过在节点上设置污点

  • 设置了Noexecute,马上驱逐这些Pod
  • 如果设置了可选TolerationSeconds,生存一段时间之后驱逐

RuntimeClassName、Overhead

  • RuntimeClassName:引用node.k8s.io组中的RuntimeClass对象,该RuntimeClass对象将被用来运行这个Pod。RuntimeClass对象提供了一种在集群中配置不同运行时之间的选择。如果不匹配Runtime,Pod不运行。不设置或者空,使用旧版RuntimeClass
  • Overhead:指定RuntimeClass对象运行Pod的资源开销

PriorityClassName、Priority

用于设置Pod优先级

// admitPod makes sure a new pod does not set spec.Priority field. It also makes sure that the PriorityClassName exists if it is provided and resolves the pod priority from the PriorityClassName.
func (p *Plugin) admitPod(a admission.Attributes) error {
operation := a.GetOperation()
pod, ok := a.GetObject().(*core.Pod)
if !ok {
return errors.NewBadRequest("resource was marked with kind Pod but was unable to be converted")
}

if operation == admission.Update {
oldPod, ok := a.GetOldObject().(*core.Pod)
if !ok {
return errors.NewBadRequest("resource was marked with kind Pod but was unable to be converted")
}

// This admission plugin set pod.Spec.Priority on create.
// Ensure the existing priority is preserved on update.
// API validation prevents mutations to Priority and PriorityClassName, so any other changes will fail update validation and not be persisted.
if pod.Spec.Priority == nil && oldPod.Spec.Priority != nil {
pod.Spec.Priority = oldPod.Spec.Priority
}
if pod.Spec.PreemptionPolicy == nil && oldPod.Spec.PreemptionPolicy != nil {
pod.Spec.PreemptionPolicy = oldPod.Spec.PreemptionPolicy
}
return nil
}

if operation == admission.Create {
var priority int32
var preemptionPolicy *apiv1.PreemptionPolicy
if len(pod.Spec.PriorityClassName) == 0 {
var err error
var pcName string
// 如果没有PriorityClassName 使用默认值
pcName, priority, preemptionPolicy, err = p.getDefaultPriority()
if err != nil {
return fmt.Errorf("failed to get default priority class: %v", err)
}
pod.Spec.PriorityClassName = pcName
} else {
// Try resolving the priority class name.
// 如果有PriorityClassName 获取,并且提取value
pc, err := p.lister.Get(pod.Spec.PriorityClassName)
if err != nil {
if errors.IsNotFound(err) {
return admission.NewForbidden(a, fmt.Errorf("no PriorityClass with name %v was found", pod.Spec.PriorityClassName))
}

return fmt.Errorf("failed to get PriorityClass with name %s: %v", pod.Spec.PriorityClassName, err)
}

priority = pc.Value
preemptionPolicy = pc.PreemptionPolicy
}
// if the pod contained a priority that differs from the one computed from the priority class, error
// 如果设置了Priority值,准入失败,拒绝创建Pod
if pod.Spec.Priority != nil && *pod.Spec.Priority != priority {
return admission.NewForbidden(a, fmt.Errorf("the integer value of priority (%d) must not be provided in pod spec; priority admission controller computed %d from the given PriorityClass name", *pod.Spec.Priority, priority))
}
pod.Spec.Priority = &priority

var corePolicy core.PreemptionPolicy
if preemptionPolicy != nil {
corePolicy = core.PreemptionPolicy(*preemptionPolicy)
if pod.Spec.PreemptionPolicy != nil && *pod.Spec.PreemptionPolicy != corePolicy {
return admission.NewForbidden(a, fmt.Errorf("the string value of PreemptionPolicy (%s) must not be provided in pod spec; priority admission controller computed %s from the given PriorityClass name", *pod.Spec.PreemptionPolicy, corePolicy))
}
pod.Spec.PreemptionPolicy = &corePolicy
}
}
return nil
}

PreemptionPolicy

用于抢占优先级低的Pod策略,有Never(不抢占)、PreemptLowerPriority(抢占优先级低的)

func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNodeStatus *framework.Status) (bool, string) {
// 判断PreemptionPolicy字段是不是为空,且值为Never,则不抢占
if pod.Spec.PreemptionPolicy != nil && *pod.Spec.PreemptionPolicy == v1.PreemptNever {
return false, fmt.Sprint("not eligible due to preemptionPolicy=Never.")
}
nodeInfos := pl.fh.SnapshotSharedLister().NodeInfos()
nomNodeName := pod.Status.NominatedNodeName
if len(nomNodeName) > 0 {
// If the pod's nominated node is considered as UnschedulableAndUnresolvable by the filters,
// then the pod should be considered for preempting again.
if nominatedNodeStatus.Code() == framework.UnschedulableAndUnresolvable {
return true, ""
}

if nodeInfo, _ := nodeInfos.Get(nomNodeName); nodeInfo != nil {
podPriority := corev1helpers.PodPriority(pod)
for _, p := range nodeInfo.Pods {

if p.Pod.DeletionTimestamp != nil && corev1helpers.PodPriority(p.Pod) < podPriority {
// There is a terminating pod on the nominated node.
return false, fmt.Sprint("not eligible due to a terminating pod on the nominated node.")
}
}
}
}
return true, ""
}

TopologySprendConstranints

描述一组Pod如何跨拓扑分布。

存储相关字段

主要通过设置Volumes字段来实现,设置Pod挂载的列表

Pod的生命周期

RestartPolicy

重启策略:

  • Always(default):总是重启,指数回退方式,10s 20s,最长5min,如果10min没有问题,重置0
  • OnFailure:失败的时候重启
  • Never:从不重启

TerminationGradePeriodSeconds

Pod优雅终止的宽限时间。收到停止信号,存活时间。默认30s;

ActiveDeadlineSeconds

ActiveDeadlineSeconds资源在集群中存活的时间,一般跟Job一起使用。

ReadinessGate

ReadinessGate是Pod就绪门控,所有门控中的状态都是Ture,Pod才是就绪,此时Pod的IP地址才会展示

主机名和DNS相关字段

Hostname

  • 创建的时候取Pod的metadata.name
  • Hostname可以指定Pod的主机名
  • 当这个字段设置,优先Pod名称成为主机名

SetHostnameASFQDN

启用此选项,Pod将主机名解析为FQDN

// GetNodenameForKernel gets hostname value to set in the hostname field (the nodename field of struct utsname) of the pod.
func GetNodenameForKernel(hostname string, hostDomainName string, setHostnameAsFQDN *bool) (string, error) {
kernelHostname := hostname
// FQDN has to be 64 chars to fit in the Linux nodename kernel field (specification 64 chars and the null terminating char).
const fqdnMaxLen = 64
// 设置FQDN
if len(hostDomainName) > 0 && setHostnameAsFQDN != nil && *setHostnameAsFQDN {
fqdn := fmt.Sprintf("%s.%s", hostname, hostDomainName)
// FQDN has to be shorter than hostnameMaxLen characters.
if len(fqdn) > fqdnMaxLen {
return "", fmt.Errorf("failed to construct FQDN from pod hostname and cluster domain, FQDN %s is too long (%d characters is the max, %d characters requested)", fqdn, fqdnMaxLen, len(fqdn))
}
kernelHostname = fqdn
}
return kernelHostname, nil
}

Subdomain

指定Pod的子域名

// GeneratePodHostNameAndDomain creates a hostname and domain name for a pod,
// given that pod's spec and annotations or returns an error.
func (kl *Kubelet) GeneratePodHostNameAndDomain(pod *v1.Pod) (string, string, error) {
clusterDomain := kl.dnsConfigurer.ClusterDomain

hostname := pod.Name
// 如果使用了HostName,符合规范,使用指定值
if len(pod.Spec.Hostname) > 0 {
if msgs := utilvalidation.IsDNS1123Label(pod.Spec.Hostname); len(msgs) != 0 {
return "", "", fmt.Errorf("pod Hostname %q is not a valid DNS label: %s", pod.Spec.Hostname, strings.Join(msgs, ";"))
}
hostname = pod.Spec.Hostname
}
// 超过63字符就阶段
hostname, err := truncatePodHostnameIfNeeded(pod.Name, hostname)
if err != nil {
return "", "", err
}

hostDomain := ""
// 指定了Subdomain 符合规范,进行拼接 hostDomain=SubDomain+NS+ClusterDomain
if len(pod.Spec.Subdomain) > 0 {
if msgs := utilvalidation.IsDNS1123Label(pod.Spec.Subdomain); len(msgs) != 0 {
return "", "", fmt.Errorf("pod Subdomain %q is not a valid DNS label: %s", pod.Spec.Subdomain, strings.Join(msgs, ";"))
}
hostDomain = fmt.Sprintf("%s.%s.svc.%s", pod.Spec.Subdomain, pod.Namespace, clusterDomain)
}

return hostname, hostDomain, nil
}

HostAliases

HostAliases是一个可选列表的属性

// makeMounts determines the mount points for the given container.
func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain string, podIPs []string, podVolumes kubecontainer.VolumeMap, hu hostutil.HostUtils, subpather subpath.Interface, expandEnvs []kubecontainer.EnvVar) ([]kubecontainer.Mount, func(), error) {
...
if mountEtcHostsFile {
// 设置 HostAliaes
hostAliases := pod.Spec.HostAliases
hostsMount, err := makeHostsMount(podDir, podIPs, hostName, hostDomain, hostAliases, pod.Spec.HostNetwork)
if err != nil {
return nil, cleanupAction, err
}
mounts = append(mounts, *hostsMount)
}
return mounts, cleanupAction, nil
...

DNSConfig、DNSPolicy

主要用于控制Pod的DNS

  • DNSConfig是可选的,可以搭配任何DNSPolicy

  • DNSPolicy为None的时候,必须制定DNSConfig

  • DNSPolicy有以下几种

    • ClusterFirstWithHostNet:以Host Network运行Pod,显式声明不然回退到Default策略
    • ClusterFirst(default):与集群域名不匹配的,都转发上游服务器
    • Default:Pod从运行节点继承DNS解析配置
    • None:忽略K8s的DNS设置,使用DNSConfig中的配置
  • DNSConfig有以下几种

    • nameservers: 用于Pod的DNS服务器IP列表,最多三个
    • searches: 用于Pod中查找主机名的DNS搜索域
    • options: 可选对象列表,每个对象有name(必须)和value(可选)

namespace相关字段

HostNetwork

主要用于是否使用主机网络命名空间,默认false

HostPID

使用主机的PID命名空间,默认false

HostPIC

使用主机IPC命名空间,默认false

ShareProcessNamespace

是否启用PID命名空间共享,默认false

  • 设置后,同一Pod下,可以收到其他容器的进程发出的信号
  • 不能同时设置HostPID和ShareProcessNamespace

HostUsers

使用主机的用户命名空间,默认true

  • 1.25 alpha阶段,需要启用UserNamespaceSupport才能使用
  • 默认false可以缓解容器逃逸漏洞,防止主机上没有root权限的用户以root启动运行容器

ServiceAccount字段

ServiceAccountName

运行此Pod的ServiceName

AutomountServiceToken

是否自动挂在服务账号的账户令牌

ServiceAccount(弃用)

ServiceAccountName替换

上下文相关

SecurityConext包含Pod级别的安全属性和常见的容器设置,可选字段,默认空

  • runAsUser:运行容器入口点的UID。未指定,默认镜像元数据中的指定的用户;spec.os.name=windows不能设置此字段
  • runAsNonRoot:容器必须以非root用户运行。如果是true,kubelet在运行时进行验证,确保UID0(root)的身份运行
  • runAsGroup:运行容器进程的入口点GID。未指定,使用默认值;spec.os.name=windows不能设置此字段
  • supplentalGroup:在容器的主GID之外,应用于每一个容器的第一个组列表。未指定,不会增加额外组列表;spec.os.name=windows不能设置此字段
  • securityConetxt.sysctls:包含用于Pod命名空间的sysctl列表。具有容器不支持的sysctl可能Pod无法启动;spec.os.name=windows不能设置此字段