K8s发现和负载均衡-Endpoints和EndpointSlice

基于1.25

什么是EndPoints

EndPoints定义了网络端点的列表

  • K8s限制单个EndPoints对象容纳的端点数量

Endpoints字段

// Endpoints is a collection of endpoints that implement the actual service. Example:
//
// Name: "mysvc",
// Subsets: [
// {
// Addresses: [{"ip": "10.10.1.1"}, {"ip": "10.10.2.2"}],
// Ports: [{"name": "a", "port": 8675}, {"name": "b", "port": 309}]
// },
// {
// Addresses: [{"ip": "10.10.3.3"}],
// Ports: [{"name": "a", "port": 93}, {"name": "b", "port": 76}]
// },
// ]
type Endpoints struct {
metav1.TypeMeta `json:",inline"`
// Standard object's metadata.
// More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata
// +optional
metav1.ObjectMeta `json:"metadata,omitempty" protobuf:"bytes,1,opt,name=metadata"`

// The set of all endpoints is the union of all subsets. Addresses are placed into
// subsets according to the IPs they share. A single address with multiple ports,
// some of which are ready and some of which are not (because they come from
// different containers) will result in the address being displayed in different
// subsets for the different ports. No address will appear in both Addresses and
// NotReadyAddresses in the same subset.
// Sets of addresses and ports that comprise a service.
// +optional
Subsets []EndpointSubset `json:"subsets,omitempty" protobuf:"bytes,2,rep,name=subsets"`
}
// EndpointSubset is a group of addresses with a common set of ports. The
// expanded set of endpoints is the Cartesian product of Addresses x Ports.
// For example, given:
//
// {
// Addresses: [{"ip": "10.10.1.1"}, {"ip": "10.10.2.2"}],
// Ports: [{"name": "a", "port": 8675}, {"name": "b", "port": 309}]
// }
//
// The resulting set of endpoints can be viewed as:
//
// a: [ 10.10.1.1:8675, 10.10.2.2:8675 ],
// b: [ 10.10.1.1:309, 10.10.2.2:309 ]
type EndpointSubset struct {
// IP addresses which offer the related ports that are marked as ready. These endpoints
// should be considered safe for load balancers and clients to utilize.
// +optional
// 标记为提供就绪的相关端口的IP地址
Addresses []EndpointAddress `json:"addresses,omitempty" protobuf:"bytes,1,rep,name=addresses"`
// IP addresses which offer the related ports but are not currently marked as ready
// because they have not yet finished starting, have recently failed a readiness check,
// or have recently failed a liveness check.
// +optional
// 还没准备就绪的IP地址
NotReadyAddresses []EndpointAddress `json:"notReadyAddresses,omitempty" protobuf:"bytes,2,rep,name=notReadyAddresses"`
// Port numbers available on the related IP addresses.
// +optional
// 相关IP地址上可用的端口号消息,包括名称、端口值、端口协议
Ports []EndpointPort `json:"ports,omitempty" protobuf:"bytes,3,rep,name=ports"`
}

Endpoints初始化

Endpoint控制器监听Service和Pod的变化:

// addEndpointSubset add the endpoints addresses and ports to the EndpointSubset.
// The addresses are added to the corresponding field, ready or not ready, depending
// on the pod status and the Service PublishNotReadyAddresses field value.
// The pod passed to this function must have already been filtered through ShouldPodBeInEndpoints.
func addEndpointSubset(logger klog.Logger, subsets []v1.EndpointSubset, pod *v1.Pod, epa v1.EndpointAddress,
epp *v1.EndpointPort, tolerateUnreadyEndpoints bool) ([]v1.EndpointSubset, int, int) {
var readyEps int
var notReadyEps int
ports := []v1.EndpointPort{}
if epp != nil {
ports = append(ports, *epp)
}
// 如果设置`service.Spec.PublishNotReadyAddressre:true,不管Pod是否就绪都加入Address
// 不然就判断就绪状态
if tolerateUnreadyEndpoints || podutil.IsPodReady(pod) {
subsets = append(subsets, v1.EndpointSubset{
Addresses: []v1.EndpointAddress{epa},
Ports: ports,
})
readyEps++
} else { // if it is not a ready address it has to be not ready
logger.V(5).Info("Pod is out of service", "pod", klog.KObj(pod))
subsets = append(subsets, v1.EndpointSubset{
NotReadyAddresses: []v1.EndpointAddress{epa},
Ports: ports,
})
notReadyEps++
}
return subsets, readyEps, notReadyEps
}

追加注释和标签

在生成过程中,会自动追加标签、注释

追加的注释

注解key 注解value 说明
endpoints.kubernetes.io/over-capacity truncated 当Endpoints超过1000个端点,就会截断
endpoints.kubernetes.io/last-change-trigger-time 时间戳 表示某些Pod或者Service最后一次触发Endpoints对象的时间戳

追加的标签

标签key 标签value 说明
service.kubernetes.io/headless “” Headless Service对应的Endpoints

什么是EndpointSlice

原来EndpointAPI上提供了在K8s网络端点中的一种简单而且直接的方法,但是随着K8s服务端点变动,要处理大量网络端点带来挑战。

EndpointSlice包含了一组网络端点的引用

  • 默认情况下,管理端点不超过100个
  • 通过修改kube-controller-manager--max-endpoints-per-slice可以修改数量

EndpointSlice字段说明

// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object

// EndpointSlice represents a subset of the endpoints that implement a service.
// For a given service there may be multiple EndpointSlice objects, selected by
// labels, which must be joined to produce the full set of endpoints.
type EndpointSlice struct {
metav1.TypeMeta
// Standard object's metadata.
// +optional
metav1.ObjectMeta
// addressType specifies the type of address carried by this EndpointSlice.
// All addresses in this slice must be the same type. This field is
// immutable after creation. The following address types are currently
// supported:
// * IPv4: Represents an IPv4 Address.
// * IPv6: Represents an IPv6 Address.
// * FQDN: Represents a Fully Qualified Domain Name. [DEPRECATED]
// 指定Endpoint携带的地址类型
// IPv4 地址
// Ipv6 地址
// FQDN 完全合格的域名
AddressType AddressType
// endpoints is a list of unique endpoints in this slice. Each slice may
// include a maximum of 1000 endpoints.
// +listType=atomic
// 切片中唯一端点列表
Endpoints []Endpoint
// ports specifies the list of network ports exposed by each endpoint in
// this slice. Each port must have a unique name. When ports is empty, it
// indicates that there are no defined ports. When a port is defined with a
// nil port value, it indicates "all ports". Each slice may include a
// maximum of 100 ports.
// +optional
// +listType=atomic
// 切片中每一个端点公开的网络端口列表,端口为nil 表示所有端口
Ports []EndpointPort
}

// Endpoint represents a single logical "backend" implementing a service.
type Endpoint struct {
// addresses of this endpoint. The contents of this field are interpreted
// according to the corresponding EndpointSlice addressType field. Consumers
// must handle different types of addresses in the context of their own
// capabilities. This must contain at least one address but no more than
// 100.
// +listType=set
// 端点对应的地址
Addresses []string
// conditions contains information about the current status of the endpoint.
// 当前端点的状态
Conditions EndpointConditions
// hostname of this endpoint. This field may be used by consumers of
// endpoints to distinguish endpoints from each other (e.g. in DNS names).
// Multiple endpoints which use the same hostname should be considered
// fungible (e.g. multiple A values in DNS). Must pass DNS Label (RFC 1123)
// validation.
// +optional
// 端点的主机名
Hostname *string
// targetRef is a reference to a Kubernetes object that represents this
// endpoint.
// +optional
// 对此端点的对象引用
TargetRef *api.ObjectReference
// deprecatedTopology is deprecated and only retained for round-trip
// compatibility with v1beta1 Topology field. When v1beta1 is removed, this
// should be removed, too.
// +optional
// 已经弃用
DeprecatedTopology map[string]string
// nodeName represents the name of the Node hosting this endpoint. This can
// be used to determine endpoints local to a Node.
// +optional
// 端点所在的节点
NodeName *string
// zone is the name of the Zone this endpoint exists in.
// +optional
// 时区
Zone *string
// hints contains information associated with how an endpoint should be
// consumed.
// +featureGate=TopologyAwareHints
// +optional
// 提示描述,实现拓扑感知提示
Hints *EndpointHints
}

拓扑感知提示

拓扑感知提示提供了一种将流量限制到它发起区域之内的机制,也称拓扑感知路由

// podToEndpoint returns an Endpoint object generated from a Pod, a Node, and a Service for a particular addressType.
func podToEndpoint(pod *v1.Pod, node *v1.Node, service *v1.Service, addressType discovery.AddressType) discovery.Endpoint {
serving := podutil.IsPodReady(pod)
terminating := pod.DeletionTimestamp != nil
// For compatibility reasons, "ready" should never be "true" if a pod is terminatng, unless
// publishNotReadyAddresses was set.
ready := service.Spec.PublishNotReadyAddresses || (serving && !terminating)
ep := discovery.Endpoint{
Addresses: getEndpointAddresses(pod.Status, service, addressType),
Conditions: discovery.EndpointConditions{
Ready: &ready,
},
TargetRef: &v1.ObjectReference{
Kind: "Pod",
Namespace: pod.ObjectMeta.Namespace,
Name: pod.ObjectMeta.Name,
UID: pod.ObjectMeta.UID,
},
}

if utilfeature.DefaultFeatureGate.Enabled(features.EndpointSliceTerminatingCondition) {
ep.Conditions.Serving = &serving
ep.Conditions.Terminating = &terminating
}

if pod.Spec.NodeName != "" {
ep.NodeName = &pod.Spec.NodeName
}

if node != nil && node.Labels[v1.LabelTopologyZone] != "" {
zone := node.Labels[v1.LabelTopologyZone]
ep.Zone = &zone
}

if endpointutil.ShouldSetHostname(pod, service) {
ep.Hostname = &pod.Spec.Hostname
}

return ep
}

复制功能

控制平面会把Endpoints资源复制为EndpointSlice资源

  • 从K8s1.19默认开启

但是在以下情况不会复制:

  1. Endpoints资源设置了Label:endpointslice.kubernetes.io/skip-mirror=true
  2. Endpoints资源设置了Annation:control-plane.alpha.kubernetes.io/leader
  3. Endpoint资源对应的Service不存在
  4. Endpoint资源对应的Service资源设置了非空的Selector
  • 一个Endpints资源存在IPv4和IPv6冲突的时候,会被复制为多个EndpointSlice资源,每周地址最多复制1000个

数据分布管理机制

控制平面管理EndpointSlice中数据尽可能填满,但是不会在多个EndpointSlice数据不均衡的情况下主动执行(Rebalance)操作,步骤如下:

  1. 遍历所有EndpointSlice资源,删除其中不需要的Endpoints,更新匹配的Endpoints
  2. 遍历上一遍更新的Endpoints,填充需要新增的Endpoints
  3. 如果还有新的待添加的Endpoints,尝试其添加到未更新的EndpointSlice资源汇总,或者创建新的EndpointSlice资源并且添加(优先考虑)

追加添加的注释和标签

追加的注释

注解key 注解value 说明
Endpoints.kubernetes.io/last-change-trigger-time 时间戳 表示某些Pod或Service 的最后一次更新EndpointSlice的时间

追加的标签

标签key 标签value 说明
kubernetes.io/service-name Service Name 指定属于的Service
Endpointslice.kubernetes.io/managed-by Endpointslice-controller.k8s.io 固定标签,表明该对象呗EndpointSlice控制器管理