From 955a96ec8bc6a2b957c8f4a6bea6bc5c232792a0 Mon Sep 17 00:00:00 2001 From: wangjianyu Date: Tue, 9 Jan 2024 14:06:44 +0800 Subject: [PATCH] release: 1.4 doc Signed-off-by: wangjianyu --- README-zh.md | 4 +- README.md | 3 + blog/2022-05-31-release/index.md | 2 +- blog/2022-09-23-release/index.md | 12 +- blog/2024-01-15-release/index.md | 298 ++++++++ blog/authors.yml | 6 + docs/installation.md | 12 +- .../2022-03-31-release/index.md | 152 +++++ .../2022-04-19-release/index.md | 98 +++ .../2022-05-07-release/index.md | 256 +++++++ .../2022-05-31-release/index.md | 207 ++++++ .../2022-06-30-release/index.md | 220 ++++++ .../2022-08-04-release/index.md | 166 +++++ .../2022-09-23-release/index.md | 373 ++++++++++ .../2022-11-03-release/index.md | 78 +++ .../2023-01-03-release/index.md | 158 +++++ .../2023-04-07-release/index.md | 192 ++++++ .../2023-08-16-release/index.md | 184 +++++ .../2024-01-15-release/index.md | 301 ++++++++ .../anolis-CPU-Co-location/index.md | 65 ++ .../authors.yml | 47 ++ .../current/installation.md | 10 +- .../version-v1.4.json | 22 + .../version-v1.4/architecture/overview.md | 60 ++ .../version-v1.4/architecture/priority.md | 88 +++ .../version-v1.4/architecture/qos.md | 35 + .../architecture/resource-model.md | 37 + .../best-practices/anolis_plugsched.md | 37 + .../colocation-of-spark-jobs.md | 101 +++ .../fine-grained-cpu-orchestration.md | 259 +++++++ .../designs/descheduler-framework.md | 84 +++ .../designs/enhanced-scheduler-extension.md | 232 +++++++ .../designs/fine-grained-cpu-orchestration.md | 451 ++++++++++++ .../designs/fine-grained-device-scheduling.md | 408 +++++++++++ .../version-v1.4/designs/gang-scheduling.md | 347 ++++++++++ .../version-v1.4/designs/koordlet-overview.md | 42 ++ .../designs/load-aware-scheduling.md | 114 ++++ ...ulti-hierarchy-elastic-quota-management.md | 342 ++++++++++ .../version-v1.4/designs/node-prediction.md | 278 ++++++++ .../designs/nri-mode-resource-management.md | 152 +++++ .../version-v1.4/designs/pod-migration-job.md | 374 ++++++++++ .../designs/resource-reservation.md | 245 +++++++ .../version-v1.4/designs/runtime-proxy.md | 136 ++++ .../version-v1.4/installation.md | 231 +++++++ .../version-v1.4/introduction.md | 48 ++ .../user-manuals/colocation-profile.md | 136 ++++ .../version-v1.4/user-manuals/cpu-burst.md | 197 ++++++ .../version-v1.4/user-manuals/cpu-evict.md | 137 ++++ .../version-v1.4/user-manuals/cpu-qos.md | 189 +++++ .../version-v1.4/user-manuals/cpu-suppress.md | 103 +++ .../fine-grained-cpu-orchestration.md | 251 +++++++ .../fine-grained-device-scheduling.md | 318 +++++++++ .../user-manuals/gang-scheduling.md | 364 ++++++++++ .../user-manuals/load-aware-descheduling.md | 229 +++++++ .../user-manuals/load-aware-scheduling.md | 311 +++++++++ .../version-v1.4/user-manuals/memory-evict.md | 122 ++++ .../version-v1.4/user-manuals/memory-qos.md | 355 ++++++++++ ...ulti-hierarchy-elastic-quota-management.md | 621 +++++++++++++++++ .../user-manuals/performance-collector.md | 188 +++++ .../user-manuals/pod-migration-job.md | 254 +++++++ .../user-manuals/resource-reservation.md | 443 ++++++++++++ .../version-v1.4/user-manuals/slo-config.md | 406 +++++++++++ static/img/cpu-normalization.svg | 3 + static/img/multiquotatree.png | Bin 0 -> 480735 bytes .../version-v1.4/architecture/overview.md | 58 ++ .../version-v1.4/architecture/priority.md | 87 +++ .../version-v1.4/architecture/qos.md | 36 + .../architecture/resource-model.md | 34 + .../best-practices/anolis_plugsched.md | 36 + .../colocation-of-hadoop-yarn.md | 230 +++++++ .../colocation-of-spark-jobs.md | 101 +++ .../fine-grained-cpu-orchestration.md | 259 +++++++ .../designs/descheduler-framework.md | 84 +++ .../designs/enhanced-scheduler-extension.md | 232 +++++++ .../designs/fine-grained-cpu-orchestration.md | 452 ++++++++++++ .../designs/fine-grained-device-scheduling.md | 408 +++++++++++ .../version-v1.4/designs/gang-scheduling.md | 385 +++++++++++ .../version-v1.4/designs/koordinator-yarn.md | 76 +++ .../version-v1.4/designs/koordlet-overview.md | 56 ++ .../designs/load-aware-scheduling.md | 115 ++++ ...ulti-hierarchy-elastic-quota-management.md | 342 ++++++++++ .../version-v1.4/designs/node-prediction.md | 278 ++++++++ .../designs/nri-mode-resource-management.md | 152 +++++ .../version-v1.4/designs/pod-migration-job.md | 374 ++++++++++ .../designs/resource-reservation.md | 245 +++++++ .../version-v1.4/designs/runtime-proxy.md | 153 +++++ versioned_docs/version-v1.4/installation.md | 170 +++++ versioned_docs/version-v1.4/introduction.md | 48 ++ .../user-manuals/capacity-scheduling.md | 645 ++++++++++++++++++ .../user-manuals/colocation-profile.md | 137 ++++ .../version-v1.4/user-manuals/cpu-burst.md | 197 ++++++ .../version-v1.4/user-manuals/cpu-evict.md | 144 ++++ .../version-v1.4/user-manuals/cpu-qos.md | 183 +++++ .../version-v1.4/user-manuals/cpu-suppress.md | 102 +++ .../fine-grained-cpu-orchestration.md | 262 +++++++ .../fine-grained-device-scheduling.md | 327 +++++++++ .../user-manuals/gang-scheduling.md | 364 ++++++++++ .../user-manuals/host-application-qos.md | 131 ++++ .../installation-runtime-proxy.md | 78 +++ .../user-manuals/load-aware-descheduling.md | 229 +++++++ .../user-manuals/load-aware-scheduling.md | 324 +++++++++ .../version-v1.4/user-manuals/memory-evict.md | 132 ++++ .../version-v1.4/user-manuals/memory-qos.md | 355 ++++++++++ .../user-manuals/performance-collector.md | 184 +++++ .../user-manuals/pod-migration-job.md | 256 +++++++ .../user-manuals/resource-reservation.md | 449 ++++++++++++ .../version-v1.4/user-manuals/slo-config.md | 432 ++++++++++++ versioned_sidebars/version-v1.4-sidebars.json | 93 +++ versions.json | 1 + 109 files changed, 21011 insertions(+), 19 deletions(-) create mode 100644 blog/2024-01-15-release/index.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-blog/2022-03-31-release/index.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-blog/2022-04-19-release/index.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-blog/2022-05-07-release/index.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-blog/2022-05-31-release/index.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-blog/2022-06-30-release/index.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-blog/2022-08-04-release/index.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-blog/2022-09-23-release/index.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-blog/2022-11-03-release/index.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-blog/2023-01-03-release/index.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-blog/2023-04-07-release/index.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-blog/2023-08-16-release/index.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-blog/2024-01-15-release/index.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-blog/anolis-CPU-Co-location/index.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-blog/authors.yml create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4.json create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/architecture/overview.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/architecture/priority.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/architecture/qos.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/architecture/resource-model.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/best-practices/anolis_plugsched.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/best-practices/colocation-of-spark-jobs.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/best-practices/fine-grained-cpu-orchestration.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/descheduler-framework.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/enhanced-scheduler-extension.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/fine-grained-cpu-orchestration.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/fine-grained-device-scheduling.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/gang-scheduling.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/koordlet-overview.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/load-aware-scheduling.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/multi-hierarchy-elastic-quota-management.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/node-prediction.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/nri-mode-resource-management.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/pod-migration-job.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/resource-reservation.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/runtime-proxy.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/installation.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/introduction.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/colocation-profile.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/cpu-burst.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/cpu-evict.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/cpu-qos.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/cpu-suppress.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/fine-grained-cpu-orchestration.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/fine-grained-device-scheduling.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/gang-scheduling.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/load-aware-descheduling.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/load-aware-scheduling.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/memory-evict.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/memory-qos.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/multi-hierarchy-elastic-quota-management.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/performance-collector.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/pod-migration-job.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/resource-reservation.md create mode 100644 i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/slo-config.md create mode 100644 static/img/cpu-normalization.svg create mode 100644 static/img/multiquotatree.png create mode 100644 versioned_docs/version-v1.4/architecture/overview.md create mode 100644 versioned_docs/version-v1.4/architecture/priority.md create mode 100644 versioned_docs/version-v1.4/architecture/qos.md create mode 100644 versioned_docs/version-v1.4/architecture/resource-model.md create mode 100644 versioned_docs/version-v1.4/best-practices/anolis_plugsched.md create mode 100644 versioned_docs/version-v1.4/best-practices/colocation-of-hadoop-yarn.md create mode 100644 versioned_docs/version-v1.4/best-practices/colocation-of-spark-jobs.md create mode 100644 versioned_docs/version-v1.4/best-practices/fine-grained-cpu-orchestration.md create mode 100644 versioned_docs/version-v1.4/designs/descheduler-framework.md create mode 100644 versioned_docs/version-v1.4/designs/enhanced-scheduler-extension.md create mode 100644 versioned_docs/version-v1.4/designs/fine-grained-cpu-orchestration.md create mode 100644 versioned_docs/version-v1.4/designs/fine-grained-device-scheduling.md create mode 100644 versioned_docs/version-v1.4/designs/gang-scheduling.md create mode 100644 versioned_docs/version-v1.4/designs/koordinator-yarn.md create mode 100644 versioned_docs/version-v1.4/designs/koordlet-overview.md create mode 100644 versioned_docs/version-v1.4/designs/load-aware-scheduling.md create mode 100644 versioned_docs/version-v1.4/designs/multi-hierarchy-elastic-quota-management.md create mode 100644 versioned_docs/version-v1.4/designs/node-prediction.md create mode 100644 versioned_docs/version-v1.4/designs/nri-mode-resource-management.md create mode 100644 versioned_docs/version-v1.4/designs/pod-migration-job.md create mode 100644 versioned_docs/version-v1.4/designs/resource-reservation.md create mode 100644 versioned_docs/version-v1.4/designs/runtime-proxy.md create mode 100644 versioned_docs/version-v1.4/installation.md create mode 100644 versioned_docs/version-v1.4/introduction.md create mode 100644 versioned_docs/version-v1.4/user-manuals/capacity-scheduling.md create mode 100644 versioned_docs/version-v1.4/user-manuals/colocation-profile.md create mode 100644 versioned_docs/version-v1.4/user-manuals/cpu-burst.md create mode 100644 versioned_docs/version-v1.4/user-manuals/cpu-evict.md create mode 100644 versioned_docs/version-v1.4/user-manuals/cpu-qos.md create mode 100644 versioned_docs/version-v1.4/user-manuals/cpu-suppress.md create mode 100644 versioned_docs/version-v1.4/user-manuals/fine-grained-cpu-orchestration.md create mode 100644 versioned_docs/version-v1.4/user-manuals/fine-grained-device-scheduling.md create mode 100644 versioned_docs/version-v1.4/user-manuals/gang-scheduling.md create mode 100644 versioned_docs/version-v1.4/user-manuals/host-application-qos.md create mode 100644 versioned_docs/version-v1.4/user-manuals/installation-runtime-proxy.md create mode 100644 versioned_docs/version-v1.4/user-manuals/load-aware-descheduling.md create mode 100644 versioned_docs/version-v1.4/user-manuals/load-aware-scheduling.md create mode 100644 versioned_docs/version-v1.4/user-manuals/memory-evict.md create mode 100644 versioned_docs/version-v1.4/user-manuals/memory-qos.md create mode 100644 versioned_docs/version-v1.4/user-manuals/performance-collector.md create mode 100644 versioned_docs/version-v1.4/user-manuals/pod-migration-job.md create mode 100644 versioned_docs/version-v1.4/user-manuals/resource-reservation.md create mode 100644 versioned_docs/version-v1.4/user-manuals/slo-config.md create mode 100644 versioned_sidebars/version-v1.4-sidebars.json diff --git a/README-zh.md b/README-zh.md index 7e0ef69aa..d3c290a0f 100644 --- a/README-zh.md +++ b/README-zh.md @@ -14,7 +14,9 @@ - 如果你添加的是当前稳定版本包含的功能文档(例如 v1.0),请添加: - 英文文档到 `docs/` 和 `versioned_docs/version-v1.0` 两个地方 - 中文文档到 `i18n/zh-Hans/docusaurus-plugin-content-docs/current` 和 `i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.0` 两个地方 - +- 如果你要发布博客,请添加: + - 英文文档到 `blog/` + - 中文文档到 `i18n/zh-Hans/docusaurus-plugin-content-blog/` 您可以使用以下命令在本地调试文档: ``` diff --git a/README.md b/README.md index ce1477423..17940651f 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,9 @@ This is **IMPORTANT** to know: - If you are adding docs for features that have already supported in stable release (e.g., v1.0), please add: - EN docs into both `docs/` and `versioned_docs/version-v1.0` - ZH docs into both `i18n/zh-Hans/docusaurus-plugin-content-docs/current` and `i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.0` +- If you are publishing blog, please add: + - EN docs into `blog/` + - ZH docs into `i18n/zh-Hans/docusaurus-plugin-content-blog/` You can debug the docs locally with the following command: diff --git a/blog/2022-05-31-release/index.md b/blog/2022-05-31-release/index.md index 6654e56ba..b08516909 100644 --- a/blog/2022-05-31-release/index.md +++ b/blog/2022-05-31-release/index.md @@ -161,7 +161,7 @@ To enable this feature, you need to update the kernel and configuration file, th There are two components involved, koord-runtime-proxy and RuntimePlugins. -![image](../../static/img/koord-runtime-proxy-architecture.svg) +![image](/img/koord-runtime-proxy-architecture.svg) ### koord-runtime-proxy koord-runtime-proxy is in charge of intercepting request during pod's lifecycle, such as RunPodSandbox, CreateContainer etc., and then calling RuntimePlugins to do resource isolation policies before transferring request to backend containerd(dockerd) and after transferring response to kubelet. koord-runtime-proxy provides an isolation-policy-execution framework which allows customized plugins registered to do specified isolation policies, these plugins are called RuntimePlugins. koord-runtime-proxy itself does NOT do any isolation policies. diff --git a/blog/2022-09-23-release/index.md b/blog/2022-09-23-release/index.md index 05e86e97c..81fac9c8e 100644 --- a/blog/2022-09-23-release/index.md +++ b/blog/2022-09-23-release/index.md @@ -21,7 +21,7 @@ Gang scheduling是在并发系统中将多个相关联的进程调度到不同 #### 改进 PodGroup 调度失败的处理机制,实现更高效的重试调度 -举个例子,PodGroup A 关联了5个Pod,其中前3个Pod通过Filter/Score,进入Wait阶段,第4个Pod调度失败,当调度第5个Pod时,发现第4个Pod已经失败,则拒绝调度。在社区 Coscheduling 实现中,调度失败的PodGroup 会加入到基于cache机制的 lastDeniedPG 对象中,当 cache 没有过期,则会拒绝调度;如果过期就允许继续调度。可以看到 cache 的过期时间很关键,过期时间设置的过长会导致Pod迟迟得不到调度机会,设置的过短会出现频繁的无效调度。
而在Enhanced Coscheduling 中,实现了一种基于 ScheduleCycle 的重试机制。以上场景为例,5个Pod的 ScheduleCycle 初始值为 0,PodGroup 对应的 ScheduleCycle 初始值为1;当每一次尝试调度 Pod 时,都会更新 Pod ScheduleCycle 为 PodGroup ScheduleCycle。如果其中一个 Pod 调度失败,会标记当前的 PodGroup ScheduleCycle 无效,之后所有小于 PodGroup ScheduleCycle 的 Pod 都会被拒绝调度。当同一个 PodGroup 下的所有 Pod 都尝试调度一轮后,Pod ScheduleCycle 都更新为当前 PodGroup ScheduleCycle,并递进 PodGroup ScheduleCycle,并标记允许调度。这种方式可以有效规避基于过期时间的缺陷,完全取决于调度队列的配置重试调度。
![image.png](../../static/img/gang-schedulue-cycle.png "基于 ScheduleCycle 的重试机制") +举个例子,PodGroup A 关联了5个Pod,其中前3个Pod通过Filter/Score,进入Wait阶段,第4个Pod调度失败,当调度第5个Pod时,发现第4个Pod已经失败,则拒绝调度。在社区 Coscheduling 实现中,调度失败的PodGroup 会加入到基于cache机制的 lastDeniedPG 对象中,当 cache 没有过期,则会拒绝调度;如果过期就允许继续调度。可以看到 cache 的过期时间很关键,过期时间设置的过长会导致Pod迟迟得不到调度机会,设置的过短会出现频繁的无效调度。
而在Enhanced Coscheduling 中,实现了一种基于 ScheduleCycle 的重试机制。以上场景为例,5个Pod的 ScheduleCycle 初始值为 0,PodGroup 对应的 ScheduleCycle 初始值为1;当每一次尝试调度 Pod 时,都会更新 Pod ScheduleCycle 为 PodGroup ScheduleCycle。如果其中一个 Pod 调度失败,会标记当前的 PodGroup ScheduleCycle 无效,之后所有小于 PodGroup ScheduleCycle 的 Pod 都会被拒绝调度。当同一个 PodGroup 下的所有 Pod 都尝试调度一轮后,Pod ScheduleCycle 都更新为当前 PodGroup ScheduleCycle,并递进 PodGroup ScheduleCycle,并标记允许调度。这种方式可以有效规避基于过期时间的缺陷,完全取决于调度队列的配置重试调度。
![image.png](/img/gang-schedulue-cycle.png "基于 ScheduleCycle 的重试机制") #### 支持多个 PodGroup 为一组完成 Gang Scheduling @@ -98,7 +98,7 @@ spec: ``` #### 树形结构管理机制和使用方法 -需要使用树形结构管理 Quota 时,需要在 ElasticQuota 中追加 Label `quota.scheduling.koordinator.sh/is-parent`表示当前 ElasticQuota 是否是父节点,`quota.scheduling.koordinator.sh/parent`表示当前 ElasticQuota 的父节点 ElasticQuota 的名字。举个例子:
![image.png](../../static/img/quota-tree.png)
我们创建一个 ElasticQuota Root 作为根节点,资源总量为CPU 100C,内存200Gi,以及子节点 quota-a +需要使用树形结构管理 Quota 时,需要在 ElasticQuota 中追加 Label `quota.scheduling.koordinator.sh/is-parent`表示当前 ElasticQuota 是否是父节点,`quota.scheduling.koordinator.sh/parent`表示当前 ElasticQuota 的父节点 ElasticQuota 的名字。举个例子:
![image.png](/img/quota-tree.png)
我们创建一个 ElasticQuota Root 作为根节点,资源总量为CPU 100C,内存200Gi,以及子节点 quota-a ```yaml apiVersion: scheduling.sigs.k8s.io/v1alpha1 kind: ElasticQuota @@ -152,13 +152,13 @@ spec: - runtime 表示 ElasticQuota 当前可以使用的实际资源量。如果 request 小于 min,runtime 等于 request。这也意味着,需要遵循 min 语义,应无条件满足 request。如果 request 大于 min,且 min 小于 max,公平性保障机制会分配 runtime 在min 与 max 之前,即 max >= runtime >= min。 - shared-weight 表示一个 ElasticQuota 的竞争力,默认等于 ElasticQuota Max。 -通过几个例子为大家介绍公平性保障机制的运行过程,假设当前集群的 CPU 总量为100C,并且有4个ElasticQuota,如下图所示,绿色部分为 Request 量:A 当前的request 为5,B当前的request为20,C当前的Request为30,D当前的Request为70。
![image.png](../../static/img/quota-init-example.png)
并且我们注意到, A, B, C, D 的 min 之和是60,剩下 40 个空闲额度, 同时 A 还可以借给 B, C, D 5个额度,所以一共有45个额度被B,C,D共享,根据各个ElasticQuota的 shared-weight,B,C,D分别对应60,50和80,计算出各自可以共享的量: +通过几个例子为大家介绍公平性保障机制的运行过程,假设当前集群的 CPU 总量为100C,并且有4个ElasticQuota,如下图所示,绿色部分为 Request 量:A 当前的request 为5,B当前的request为20,C当前的Request为30,D当前的Request为70。
![image.png](/img/quota-init-example.png)
并且我们注意到, A, B, C, D 的 min 之和是60,剩下 40 个空闲额度, 同时 A 还可以借给 B, C, D 5个额度,所以一共有45个额度被B,C,D共享,根据各个ElasticQuota的 shared-weight,B,C,D分别对应60,50和80,计算出各自可以共享的量: - B 可以获取 14个额度, 45 * 60 / (60 + 50 + 80) = 14 - C 可以获取 12个额度, 45 * 50 / (60 + 50 + 80) = 12 - D 可以获取 19个额度, 45 * 80 / (60 + 50 + 80) = 19 -![image.png](../../static/img/quota-init-runtime-example.png)
但我们也要注意的是,C和D需要更多额度,而 B只需要5个额度就能满足 Request,并且 B 的min是15,也就意味着我们只需要给 B 5个额度,剩余的9个额度继续分给C和D。 +![image.png](/img/quota-init-runtime-example.png)
但我们也要注意的是,C和D需要更多额度,而 B只需要5个额度就能满足 Request,并且 B 的min是15,也就意味着我们只需要给 B 5个额度,剩余的9个额度继续分给C和D。 - C 可以获取 3个额度, 9 * 50 / (50 + 80) = 3 - D 可以获取 6个额度, 9 * 80 / (50 + 80) = 6 @@ -187,7 +187,7 @@ Koordinator ElasticQuota 机制在调度阶段如果发现 Quota 不足,会进 ### Device Share Scheduling -机器学习领域里依靠大量强大算力性能的 GPU 设备完成模型训练,但是 GPU 自身价格十分昂贵。如何更好地利用GPU设备,发挥GPU的价值,降低成本,是一个亟待解决的问题。 Kubernetes 社区现有的 GPU 分配机制中,GPU 是由 kubelet 分配的,并只支持分配一个或多个完整的 GPU 实例。 这种方法简单可靠,但类似于 CPU 和 Memory,GPU 并不是一直处于高利用率水位,同样存在资源浪费的问题。 因此,Koordinator 希望支持多工作负载共享使用 GPU 设备以节省成本。 此外,GPU 有其特殊性。 比如下面的 NVIDIA GPU 支持的 NVLink 和超卖场景,都需要通过调度器进行中央决策,以获得全局最优的分配结果。
![image.png](../../static/img/nvlink.png) +机器学习领域里依靠大量强大算力性能的 GPU 设备完成模型训练,但是 GPU 自身价格十分昂贵。如何更好地利用GPU设备,发挥GPU的价值,降低成本,是一个亟待解决的问题。 Kubernetes 社区现有的 GPU 分配机制中,GPU 是由 kubelet 分配的,并只支持分配一个或多个完整的 GPU 实例。 这种方法简单可靠,但类似于 CPU 和 Memory,GPU 并不是一直处于高利用率水位,同样存在资源浪费的问题。 因此,Koordinator 希望支持多工作负载共享使用 GPU 设备以节省成本。 此外,GPU 有其特殊性。 比如下面的 NVIDIA GPU 支持的 NVLink 和超卖场景,都需要通过调度器进行中央决策,以获得全局最优的分配结果。
![image.png](/img/nvlink.png) 从图中我们可以发现,虽然该节点有8个 GPU 实例,型号为A100/V100,但 GPU 实例之间的数据传输速度是不同的。 当一个 Pod 需要多个 GPU 实例时,我们可以为 Pod 分配具有最大数据传输速度组合关系的 GPU 实例。 此外,当我们希望一组 Pod 中的 GPU 实例具有最大数据传输速度组合关系时,调度器应该将最佳 GPU 实例批量分配给这些 Pod,并将它们分配到同一个节点。 @@ -261,7 +261,7 @@ Kuberetes 社区原生提供的设备调度机制中,调度器只负责校验 #### 单机侧精准绑定设备信息 -Kubernetes 社区在 kubelet 中提供了 DevicePlugin 机制,支持设备厂商在 kubelet 分配好设备后有机会获得设备信息,并填充到环境变量或者更新挂载路径。但是不能支持 中心化的 GPU 精细化调度场景。
针对这个问题, Koordinator 扩展了 koord-runtime-proxy ,支持在 kubelet 创建容器时更新环境变量,注入调度器分配的 GPU 设备信息。
![](../../static/img/koordlet-inject-env.jpeg) +Kubernetes 社区在 kubelet 中提供了 DevicePlugin 机制,支持设备厂商在 kubelet 分配好设备后有机会获得设备信息,并填充到环境变量或者更新挂载路径。但是不能支持 中心化的 GPU 精细化调度场景。
针对这个问题, Koordinator 扩展了 koord-runtime-proxy ,支持在 kubelet 创建容器时更新环境变量,注入调度器分配的 GPU 设备信息。
![](/img/koordlet-inject-env.jpeg) ## 3. 调度器诊断分析 diff --git a/blog/2024-01-15-release/index.md b/blog/2024-01-15-release/index.md new file mode 100644 index 000000000..dc47c5667 --- /dev/null +++ b/blog/2024-01-15-release/index.md @@ -0,0 +1,298 @@ +--- +slug: release-v1.4.0 +title: "Koordinator v1.4: more types of computing workloads and more flexible resource management mechanisms" +authors: [ZiMengSheng] +tags: [release] +--- + +## Background + +As an actively developing open source project, Koordinator has undergo multiple version iterations since the release of v0.1.0 in April 2022, continuously bringing innovations and enhancements to the Kubernetes ecosystem. The core objective of the project is to provide comprehensive solutions for orchestrating collocated workloads, scheduling resources, ensuring resource isolation, and tuning performance to help users optimize container performance and improve cluster resource utilization. + +In past version iterations, the Koordinator community has continued to grow, receiving active participation and contributions from engineers at well-known companies. These include Alibaba, Ant Technology Group, Intel, Xiaomi, Xiaohongshu, iQIYI, Qihoo 360, Youzan, Quwan, Meiya Pico, PITS, and others. Each version has advanced through the collective efforts of the community, demonstrating the project's capability to address challenges in actual production environments. + +Today, we are pleased to announce that Koordinator v1.4.0 is officially released. This version introduces several new features, including Kubernetes and YARN workload co-location, a NUMA topology alignment strategy, CPU normalization, and cold memory reporting. It also enhances features in key areas such as elastic quota management, QoS management for non-containerized applications on hosts, and descheduling protection strategies. These innovations and improvements aim to better support enterprise-level Kubernetes cluster environments, particularly in complex and diverse application scenarios. + +The release of version v1.4.0 will bring users support for more types of computing workloads and more flexible resource management mechanisms. We look forward to these improvements helping users to address a broader range of enterprise resource management challenges. In the v1.4.0 release, a total of 11 new developers have joined the development of the Koordinator community. They are @shaloulcy, @baowj-678, @zqzten, @tan90github, @pheianox, @zxh326, @qinfustu, @ikaven1024, @peiqiaoWang, @bogo-y, and @xujihui1985. We thank all community members for their active participation and contributions during this period and for their ongoing commitment to the community. + +## Interpretation of Version Features + +### 1. Support Kubernetes and YARN workload co-location + +Koordinator already supports the co-location of online and offline workloads within the Kubernetes ecosystem. However, outside the Kubernetes ecosystem, a considerable number of big data workloads still run on traditional Hadoop YARN. + +In response, the Koordinator community, together with developers from Alibaba Cloud, Xiaohongshu, and Ant Financial, has jointly launched the Hadoop YARN and Kubernetes co-location project, Koordinator YARN Copilot. This initiative enables the running of Hadoop NodeManager within Kubernetes clusters, fully leveraging the technical value of peak-shaving and resource reuse for different types of workloads. Koordinator YARN Copilot has the following features: + +- Embrace the open-source ecosystem: Built upon the open-source version of Hadoop YARN without any intrusive modifications to YARN. +- Unified resource priority and QoS policy: YARN NodeManager utilizes Koordinator’s Batch priority resources and adheres to Koordinator's QoS management policies. +- Node-level resource sharing: The co-location resources provided by Koordinator can be used by both Kubernetes pod and YARN tasks. Different types of offline applications can run on the same node. + +![img](/img/hadoop-k8s.svg) + +For the detailed design of Koordinator YARN Copilot and its use in the Xiaohongshu production environment, please refer to [Previous Articles](https://mp.weixin.qq.com/s/N0QEJYyOhoDZoVQ6hGhnmg) and [Official Community Documents](https://koordinator.sh/zh-Hans/docs/next/designs/koordinator-yarn). + +### 2. Introducing NUMA topology alignment strategy + +The workloads running in Kubernetes clusters are increasingly diverse, particularly in fields such as machine learning, where the demand for high-performance computing resources is on the rise. In these fields, a significant amount of CPU resources is required, as well as other high-speed computing resources like GPUs and RDMA. Moreover, to achieve optimal performance, these resources often need to be located on the same NUMA node or even the same PCIe bus. + +Kubernetes' kubelet includes a topology manager that manages the NUMA topology of resource allocation. It attempts to align the topologies of multiple resources at the node level during the admission phase. However, because the node component lacks a global view of the scheduler and the timing of node selection for pods, pods may be scheduled on nodes that are unable to meet the topology alignment policy. This can result in pods failing to start due to `topology affinity` errors." + +To solve this problem, Koordinator moves NUMA topology selection and alignment to the central scheduler, optimizing resource NUMA topology at the cluster level. In this release, Koordinator introduces NUMA-aware scheduling of CPU resources (including Batch resources) and NUMA-aware scheduling of GPU devices as alpha features. The entire suite of NUMA-aware scheduling features is rapidly evolving. + +Koordinator enables users to configure the NUMA topology alignment strategy for multiple resources on a node through the node's labels. The configurable strategies are as follows: + +- `None`, the default strategy, does not perform any topological alignment. +- `BestEffort` indicates that the node does not strictly allocate resources according to NUMA topology alignment. The scheduler can always allocate such nodes to pods as long as the remaining resources meet the pods' needs. +- `Restricted` means that nodes allocate resources in strict accordance with NUMA topology alignment. In other words, the scheduler must select the same one or more NUMA nodes when allocating multiple resources, otherwise, the node should not be considered. For instance, if a pod requests 33 CPU cores and each NUMA node has 32 cores, it can be allocated to use two NUMA nodes. However, if the pod also requests GPUs or RDMA, these must be on the same NUMA node as the CPU. +- `SingleNUMANode` is similar to `Restricted`, adhering strictly to NUMA topology alignment, but it differs in that `Restricted` permits the use of multiple NUMA nodes, whereas `SingleNUMANode` restricts allocation to a single NUMA node. + +For example, to set the `SingleNUMANode` policy for node-0, you would do the following:" + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + node.koordinator.sh/numa-topology-policy: "SingleNUMANode" + name: node-0 +spec: + ... +``` + +In a production environment, users may have enabled Kubelet's topology alignment policy, which will be reflected by the koordlet in the TopologyPolicies field of the NodeResourceTopology CR object. When Kubelet's policy conflicts with the policy set by the user on the node, the Kubelet policy shall take precedence. The Koordinator scheduler essentially adopts the same NUMA alignment policy semantics as the Kubelet Topology Manager. The Kubelet policies SingleNUMANodePodLevel and SingleNUMANodeContainerLevel are both mapped to SingleNUMANode. + +After configuring the NUMA alignment strategy for the node, the scheduler can identify many suitable NUMA node allocation results for each pod. Koordinator currently supports the NodeNUMAResource plugin, which allows for configuring the NUMA node allocation result scoring strategy for CPU and memory resources. This includes `LeastAllocated` and `MostAllocated` strategies, with `LeastAllocated` being the default. Each resource can also be assigned a configurable weight. The scheduler will ultimately select the NUMA node allocation with the highest score. For instance, we can configure the NUMA node allocation result scoring strategy to `MostAllocated`, as shown in the following example:" + +```yaml +apiVersion: kubescheduler.config.k8s.io/v1beta2 +kind: KubeSchedulerConfiguration +profiles: + - pluginConfig: + - name: NodeNUMAResource + args: + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: NodeNUMAResourceArgs + scoringStrategy: # Here configure Node level scoring strategy + type: MostAllocated + resources: + - name: cpu + weight: 1 + - name: memory + weight: 1 + - name: "kubernetes.io/batch-cpu" + weight: 1 + - name: "kubernetes.io/batch-memory" + weight: 1 + numaScoringStrategy: # Here configure NUMA-Node level scoring strategy + type: MostAllocated + resources: + - name: cpu + weight: 1 + - name: memory + weight: 1 + - name: "kubernetes.io/batch-cpu" + weight: 1 + - name: "kubernetes.io/batch-memory" + weight: 1 +``` + +### 3. ElasticQuota evolves again + +In order to fully utilize cluster resources and reduce management system costs, users often deploy workloads from multiple tenants in the same cluster. When cluster resources are limited, competition for these resources is inevitable between different tenants. As a result, the workloads of some tenants may always be satisfied, while others may never be executed, leading to demands for fairness. The quota mechanism is a very natural way to ensure fairness among tenants, where each tenant is allocated a specific quota, and they can use resources within that quota. Tasks exceeding the quota will not be scheduled or executed. However, simple quota management cannot fulfill tenants' expectations for elasticity in the cloud. Users hope that in addition to satisfying resource requests within the quota, requests for resources beyond the quota can also be met on demand. + +In previous versions, Koordinator leveraged the upstream ElasticQuota protocol, which allowed tenants to set a 'Min' value to express their resource requests that must be satisfied, and a 'Max' value to limit the maximum resources they can use. 'Max' was also used to represent the shared weight of the remaining resources of the cluster when they were insufficient. + +In addition to offering a flexible quota mechanism that accommodates tenants' on-demand resource requests, Koordinator enhances ElasticQuota with annotations to organize it into a tree structure, thereby simplifying the expression of hierarchical organizational structures for users. + +![img](/img/quotatree1.jpg) + +The figure above depicts a common quota structure tree in a cluster utilizing Koordinator's elastic quota. The root quota serves as the link between the quota system and the actual resources within the cluster. In previous iterations, the root quota existed only within the scheduler's logic. In this release, we have also made the root quota accessible to users in the form of a Custom Resource (CR). Users can now view information about the root quota through the ElasticQuota CR named 'koordinator-root-quota'. + +#### 3.1 Introducing Multi QuotaTree + +In large clusters, there are various types of nodes. For example, ECS VMs provided by cloud vendors will have different architectures. The most common ones are amd64 and arm64. There are different models with the same architecture. In addition, nodes generally have location attributes such as availability zone. When nodes of different types are managed in the same quota tree, their unique attributes will be lost. When users want to manage the unique attributes of machines in a refined manner, the current ElasticQuota appears not to be accurate enough. In order to meet users' requirements for flexible resource management or resource isolation, Koordinator supports users to divide the resources in the cluster into multiple parts, each part is managed by a Quota Tree, as shown in the following figure: + +In large clusters, various types of nodes are present. For example, ECS VMs provided by cloud vendors come with different architectures, the most common being amd64 and arm64. Additionally, there are different models within the same architecture, and nodes typically have locational attributes such as availability zones. When nodes of different types are managed under the same quota tree, their unique attributes can be obscured. For users looking to manage the unique attributes of nodes, the current ElasticQuota may not be sufficiently accurate. To satisfy users' needs for flexible resource management and resource isolation, Koordinator enables the division of cluster resources into multiple parts. Each part is managed by its own quota tree, as illustrated in the following figure: + +![img](/img/multiquotatree.png) + +Additionally, to help users simplify management complexity, Koordinator introduced the ElasticQuotaProfile mechanism in version 1.4.0. Users can quickly associate nodes with different quota trees through the nodeSelector, as shown in the following example: + +```yaml +apiVersion: quota.koordinator.sh/v1alpha1 +kind: ElasticQuotaProfile +metadata: + labels: + kubernetes.io/arch: amd64 + name: amd64-profile + namespace: kube-system +spec: + nodeSelector: + matchLabels: + kubernetes.io/arch: amd64 // amd64 node + quotaName: amd64-root-quota // the name of root quota +--- +apiVersion: quota.koordinator.sh/v1alpha1 +kind: ElasticQuotaProfile +metadata: + labels: + kubernetes.io/arch: arm64 + name: arm64-profile + namespace: kube-system +spec: + nodeSelector: + matchLabels: + kubernetes.io/arch: arm64 // arm64 node + quotaName: arm64-root-quota // the name of root quota +``` + +After associating nodes with the quota tree, the user utilizes the same ElasticQuota in each quota tree as before. When a user submits a pod to the corresponding quota, they currently still need to configure the pod's NodeAffinity to ensure that the pod runs on the correct node. In the future, we plan to add a feature that will help users automatically manage the mapping relationship from quota to node. + +#### 3.2 Support non-preemptible + +Koordinator ElasticQuota supports sharing the unused part of 'Min' in ElasticQuota with other ElasticQuotas to improve resource utilization. However, when resources are tight, the pod that borrows the quota will be preempted and evicted through the preemption mechanism to get the resources back. + +In actual production environments, if some critical online services borrow this part of the quota from other ElasticQuotas and preemption subsequently occurs, the quality of service may be adversely affected. Such workloads should not be subject to preemption. + +To implement this safeguard, Koordinator v1.4.0 introduced a new API. Users can simply annotate a pod with `quota.scheduling.koordinator.sh/preemptible: false` to indicate that the pod should not be preempted. + +When the scheduler detects that a pod is declared non-preemptible, it ensures that the available quota for such a pod does not exceed its 'Min'. Thus, it is important to note that when enabling this feature, the 'Min' of an ElasticQuota should be set judiciously, and the cluster must have appropriate resource guarantees in place. This feature maintains compatibility with the original behavior of Koordinator. + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example + namespace: default + labels: + quota.scheduling.koordinator.sh/name: "quota-example" + quota.scheduling.koordinator.sh/preemptible: false +spec: +... +``` + +#### 3.3 Other improvements + +1. The Koordinator Scheduler previously supported the use of a single ElasticQuota object across multiple namespaces. However, in some cases, it is desirable for the same object to be shared by only a select few namespaces. To accommodate this need, users can now annotate the ElasticQuota CR with `quota.scheduling.koordinator.sh/namespaces`, assigning a JSON string array as the value. +2. Performance optimization: Previously, whenever an ElasticQuota was modified, the ElasticQuota plugin would rebuild the entire QuotaTree. This process has been optimized in version 1.4.0. +3. Support ignoring overhead: When a pod utilizes secure containers, an overhead declaration is typically added to the pod specification to account for the resource consumption of the secure container itself. However, whether these additional resource costs should be passed on to the end user depends on the resource pricing strategy. If it is expected that users should not be responsible for these costs, the ElasticQuota can be configured to disregard overhead. With version 1.4.0, this can be achieved by enabling the feature gate ElasticQuotaIgnorePodOverhead. + +### 4. CPU normalization + +With the diversification of node hardware in Kubernetes clusters, significant performance differences exist between CPUs of various architectures and generations. Therefore, even if a pod's CPU request is identical, the actual computing power it receives can vary greatly, potentially leading to resource waste or diminished application performance. The objective of CPU normalization is to ensure that each CPU unit in Kubernetes provides consistent computing power across heterogeneous nodes by standardizing the performance of allocatable CPUs. + +To address this issue, Koordinator has implemented a CPU normalization mechanism in version 1.4.0. This mechanism adjusts the amount of CPU resources that can be allocated on a node according to the node's resource amplification strategy, ensuring that each allocatable CPU in the cluster delivers a consistent level of computing power. The overall architecture is depicted in the figure below: + +![img](/img/cpu-normalization.svg) + +CPU normalization consists of two steps + +1. CPU performance evaluation: To calculate the performance benchmarks of different CPUs, you can refer to the industrial performance evaluation standard, [SPEC CPU](https://www.spec.org/cpu2017/). This part is not provided by the Koordinator project. +2. Configuration of the CPU normalization ratio in Koordinator: The scheduling system schedules resources based on the normalization ratio, which is provided by Koordinator. + +Configure the CPU normalization ratio information into slo-controller-config of koord-manager. The configuration example is as follows: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + cpu-normalization-config: | + { + "enable": true, + "ratioModel": { + "Intel(R) Xeon(R) Platinum 8269CY CPU @ 2.50GHz": { + "baseRatio": 1.29, + "hyperThreadEnabledRatio": 0.82, + "turboEnabledRatio": 1.52, + "hyperThreadTurboEnabledRatio": 1.0 + }, + "Intel Xeon Platinum 8369B CPU @ 2.90GHz": { + "baseRatio": 1.69, + "hyperThreadEnabledRatio": 1.06, + "turboEnabledRatio": 1.91, + "hyperThreadTurboEnabledRatio": 1.20 + } + } + } + # ... +``` + +For nodes configured with CPU normalization, Koordinator intercepts updates to Node.Status.Allocatable by Kubelet through a webhook to achieve the amplification of CPU resources. This results in the display of the normalized amount of CPU resources available for allocation on the node. + +### 5. Improved descheduling protection strategies + +Pod migration is a complex process that involves steps such as auditing, resource allocation, and application startup. It is often intertwined with application upgrades, scaling scenarios, and the resource operations and maintenance performed by cluster administrators. Consequently, if a large number of pods are migrated simultaneously, the system's stability may be compromised. Furthermore, migrating many pods from the same workload at once can also affect the application's stability. Additionally, simultaneous migrations of pods from multiple jobs may lead to a 'thundering herd' effect. Therefore, it is preferable to process the pods in each job sequentially. + +To address these issues, Koordinator previously provided the PodMigrationJob function with some protection strategies. In version v1.4.0, Koordinator has enhanced these protection strategies into an arbitration mechanism. When there is a large number of executable PodMigrationJobs, the arbiter decides which ones can proceed by employing sorting and filtering techniques. + +The sorting process is as follows: +- The time interval between the start of migration and the current, the smaller the interval, the higher the ranking. +- The pod priority of PodMigrationJob, the lower the priority, the higher the ranking. +- Disperse Jobs by workload, Make PodMigrationJobs close in the same job. +- If some pods in the job containing PodMigrationJob's pod is being migrated, the PodMigrationJob's ranking is higher. + +The filtering process is as follows: +- Group and filter PodMigrationJobs based on workload, node, namespace, etc. +- Check the number of running podMigrationJobs in each workload, and those that reach a certain threshold will be excluded. +- Check whether the number of unavailable replicas in each workload exceeds the maximum number of unavailable replicas, and those that exceed the number will be excluded. +- Check whether the number of pods being migrated on the node where the target pod is located exceeds the maximum migration amount of a single node, and those that exceed will be excluded. + +### 6. Cold Memory Reporting + +To improve system performance, the kernel generally tries not to free the page cache requested by an application but allocates as much as possible to the application. Although allocated by the kernel, this memory may no longer be accessed by applications and is referred to as cold memory. + +Koordinator introduced the cold memory reporting function in version 1.4, primarily to lay the groundwork for future cold memory recycling capabilities. Cold memory recycling is designed to address two scenarios: + +1. In standard Kubernetes clusters, when the node memory level is too high, sudden memory requests can lead to direct memory recycling of the system. This can affect the performance of running containers and, in extreme cases, may result in out-of-memory (OOM) events if recycling is not timely. Therefore, maintaining a relatively free pool of node memory resources is crucial for runtime stability. +2. In co-location scenarios, high-priority applications' unused requested resources can be recycled by lower-priority applications. Since memory not reclaimed by the operating system is invisible to the Koordinator scheduling system, reclaiming unused memory pages of a container is beneficial for improving resource utilization. + +Koordlet has added a cold page collector to its collector plugins for reading the cgroup file memory.idle_stat, which is exported by kidled (Anolis kernel), kstaled (Google), or DAMON (Amazon). This file contains information about cold pages in the page cache and is present at every hierarchy level of memory. Koordlet already supports the kidled cold page collector and provides interfaces for other cold page collectors. + +After collecting cold page information, the cold page collector stores the metrics, such as hot page usage and cold page size for nodes, pods, and containers, into metriccache. This data is then reported to the NodeMetric Custom Resource (CR). + +Users can enable cold memory recycling and configure cold memory collection strategies through NodeMetric. Currently, three strategies are offered: usageWithHotPageCache, usageWithoutPageCache and usageWithPageCache. For more details, please see the community [Design Document](https://github.com/koordinator-sh/koordinator/blob/main/docs/proposals/koordlet/20230728-support-cold-memory-compute.md)。 + +### 7. QoS management for non-containerized applications + +"In the process of enterprise containerization, there may be non-containerized applications running on the host alongside those already running on Kubernetes. In order to be better compatible with enterprises in the containerization process, Koordinator has developed a node resource reservation mechanism. This mechanism can reserve resources and assign specific QoS (Quality of Service) levels to applications that have not yet been containerized. Unlike the resource reservation configuration provided by Kubelet, Koordinator's primary goal is to address QoS issues that arise during the runtime of both non-containerized and containerized applications. The overall solution is depicted in the figure below: + +![img](/img/host-application.svg) + +Currently, applications need to start processes into the corresponding cgroup according to specifications, and Koordinator does not provide an automatic cgroup relocation tool. For host non-containerized applications, QoS is supported as follows: + +- LS (Latency Sensitive) +- - CPU QoS (Group Identity): The application runs the process in the CPU subsystem of the cgroup according to the specification, and the koordlet sets the Group Identity parameter for it according to the CPU QoS configuration resource-qos-config; +- - CPUSet Allocation: The application runs the process in the CPU subsystem of the cgroup according to the specification, and the koordlet will set all CPU cores in the CPU share pool for it. + +- BE (Best-effort) + +- - CPU QoS (Group Identity): The application runs the process in the CPU subsystem of the cgroup according to the specification, and the koordlet sets the Group Identity parameter for it according to the configuration of CPU QoS. + +For detailed design of QoS management of non-containerized applications on the host, please refer to [Community Documentation](https://koordinator.sh/zh-Hans/docs/next/user-manuals/host-application-qos). In the future, we will gradually add support for other QoS strategies for host non-containerized applications. + +### 8. Other features + +In addition to the new features and functional enhancements mentioned above, Koordinator has also implemented the following bug fixes and optimizations in version 1.4.0: + +1. RequiredCPUBindPolicy: RequiredCPUBindPolicy: Fine-grained CPU orchestration now supports the configuration of the Required CPU binding policy, which means that CPUs are allocated strictly in accordance with the specified CPU binding policy; otherwise, scheduling will fail. +2. CICD: The Koordinator community provides a set of e2e testing Pipeline in v1.4.0; an ARM64 image is provided. +3. Batch resource calculation strategy optimization: There is support for the maxUsageRequest calculation strategy, which conservatively reclaims high-priority resources. This update also optimizes the underestimate of Batch allocatable when a large number of pods start and stop on a node in a short period of time and improves considerations for special circumstances such as hostApplication, third-party allocatable, and dangling pod usage. +4. Others: Optimizations include using libpfm4 and perf groups to improve CPI collection, allowing SystemResourceCollector to support customized expiration time configuration, enabling BE pods to calculate CPU satisfaction based on the evictByAllocatable policy, repairing Koordlet's CPUSetAllocator filtering logic for pods with LS and None QoS, and enhancing RDT resource control to retrieve the task IDs of sandbox containers. + +For a comprehensive list of new features in version 1.4.0, please visit the [v1.4.0 Release](https://github.com/koordinator-sh/koordinator/releases/tag/v1.4.0) page. + +## Future plan + +In upcoming versions, Koordinator has planned the following features: + +- Core Scheduling: On the runtime side, Koordinator has begun exploring the next generation of CPU QoS capabilities. By leveraging kernel mechanisms such as Linux Core Scheduling, it aims to enhance resource isolation at the physical core level and reduce the security risks associated with co-location. For more details on this work, see see [Issue #1728](https://github.com/koordinator-sh/koordinator/issues/1728). +- Joint Allocation Of Devices: In scenarios involving AI large model distributed training, GPUs from different machines often need to communicate through high-performance network cards. Performance is improved when GPUs and high-performance network cards are allocated in close proximity. Koordinator is advancing the joint allocation of multiple heterogeneous resources. Currently, it supports joint allocation in terms of protocol and scheduler logic; the reporting logic for network card resources on the node side is being explored. + +For more information, please pay attention to [Milestone v1.5.0](https://github.com/koordinator-sh/koordinator/milestone/14). + +## Conclusion + +Finally, we are immensely grateful to all the contributors and users of the Koordinator community. Your active participation and valuable advice have enabled Koordinator to continue improving. We eagerly look forward to your ongoing feedback and warmly welcome new contributors to join our ranks. diff --git a/blog/authors.yml b/blog/authors.yml index 8adb5c242..820946ebe 100644 --- a/blog/authors.yml +++ b/blog/authors.yml @@ -39,3 +39,9 @@ saintube: title: Koordinator member url: https://github.com/saintube image_url: https://github.com/saintube.png + +ZiMengSheng: + name: Jianyu Wang + title: Koordinator member + url: https://github.com/ZiMengSheng + image_url: https://github.com/ZiMengSheng.png diff --git a/docs/installation.md b/docs/installation.md index c960151d1..180f5181c 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -20,7 +20,7 @@ $ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ $ helm repo update # Install the latest version. -$ helm install koordinator koordinator-sh/koordinator --version 1.3.0 +$ helm install koordinator koordinator-sh/koordinator --version 1.4.0 ``` ## Upgrade with helm @@ -33,7 +33,7 @@ $ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ $ helm repo update # Upgrade the latest version. -$ helm upgrade koordinator koordinator-sh/koordinator --version 1.3.0 [--force] +$ helm upgrade koordinator koordinator-sh/koordinator --version 1.4.0 [--force] ``` Note that: @@ -57,7 +57,7 @@ $ helm install/upgrade koordinator /PATH/TO/CHART ### Prerequisite - Containerd >= 1.7.0 and enable NRI. Please make sure NRI is enabled in containerd. If not, please refer to [Enable NRI in Containerd](https://github.com/containerd/containerd/blob/main/docs/NRI.md) -- Koordinator >= 1.3 +- Koordinator >= 1.4 ### Configurations @@ -82,7 +82,7 @@ The following table lists the configurable parameters of the chart and their def | `manager.log.level` | Log level that koord-manager printed | `4` | | `manager.replicas` | Replicas of koord-manager deployment | `2` | | `manager.image.repository` | Repository for koord-manager image | `koordinatorsh/koord-manager` | -| `manager.image.tag` | Tag for koord-manager image | `v1.3.0` | +| `manager.image.tag` | Tag for koord-manager image | `v1.4.0` | | `manager.resources.limits.cpu` | CPU resource limit of koord-manager container | `1000m` | | `manager.resources.limits.memory` | Memory resource limit of koord-manager container | `1Gi` | | `manager.resources.requests.cpu` | CPU resource request of koord-manager container | `500m` | @@ -97,7 +97,7 @@ The following table lists the configurable parameters of the chart and their def | `scheduler.log.level` | Log level that koord-scheduler printed | `4` | | `scheduler.replicas` | Replicas of koord-scheduler deployment | `2` | | `scheduler.image.repository` | Repository for koord-scheduler image | `koordinatorsh/koord-scheduler` | -| `scheduler.image.tag` | Tag for koord-scheduler image | `v1.3.0` | +| `scheduler.image.tag` | Tag for koord-scheduler image | `v1.4.0` | | `scheduler.resources.limits.cpu` | CPU resource limit of koord-scheduler container | `1000m` | | `scheduler.resources.limits.memory` | Memory resource limit of koord-scheduler container | `1Gi` | | `scheduler.resources.requests.cpu` | CPU resource request of koord-scheduler container | `500m` | @@ -109,7 +109,7 @@ The following table lists the configurable parameters of the chart and their def | `scheduler.hostNetwork` | Whether koord-scheduler pod should run with hostnetwork | `false` | | `koordlet.log.level` | Log level that koordlet printed | `4` | | `koordlet.image.repository` | Repository for koordlet image | `koordinatorsh/koordlet` | -| `koordlet.image.tag` | Tag for koordlet image | `v1.3.0` | +| `koordlet.image.tag` | Tag for koordlet image | `v1.4.0` | | `koordlet.resources.limits.cpu` | CPU resource limit of koordlet container | `500m` | | `koordlet.resources.limits.memory` | Memory resource limit of koordlet container | `256Mi` | | `koordlet.resources.requests.cpu` | CPU resource request of koordlet container | `0` | diff --git a/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-03-31-release/index.md b/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-03-31-release/index.md new file mode 100644 index 000000000..47880b3f0 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-03-31-release/index.md @@ -0,0 +1,152 @@ +--- +slug: release-v0.1.0 +title: Koordinator v0.1.0 - QoS based scheduling system +authors: [joseph, hormes] +tags: [koordinator, colocation, kubernetes, scheduling, orchestration, release] +--- + + +We’re pleased to announce the release of Koordinator v0.1.0. + +## Overview +Koordinator is a QoS-based scheduling for efficient orchestration of microservices, AI, and big data workloads on Kubernetes. It aims to improve the runtime efficiency and reliability of both latency sensitive workloads and batch jobs, simplify the complexity of resource-related configuration tuning, and increase pod deployment density to improve resource utilizations. + +## Key Features +Koordinator enhances the kubernetes user experiences in the workload management by providing the following: + +- Well-designed [priority](/docs/architecture/priority) and [QoS](/docs/architecture/qos) mechanism to co-locate different types of workloads in a cluster and run different types of pods on a single node. +Allowing for resource overcommitments to achieve high resource utilizations but still satisfying the QoS guarantees by leveraging an application profiling mechanism. +- Fine-grained resource orchestration and isolation mechanism to improve the efficiency of latency-sensitive workloads and batch jobs. +- Flexible job scheduling mechanism to support workloads in specific areas, e.g., big data, AI, audio and video. +- A set of tools for monitoring, troubleshooting and operations. + +## Node Metrics + +Koordinator defines the `NodeMetrics` CRD, which is used to record the resource utilization of a single node and all Pods on the node. koordlet will regularly report and update `NodeMetrics`. You can view `NodeMetrics` with the following commands. + +```shell +$ kubectl get nodemetrics node-1 -o yaml +apiVersion: slo.koordinator.sh/v1alpha1 +kind: NodeMetric +metadata: + creationTimestamp: "2022-03-30T11:50:17Z" + generation: 1 + name: node-1 + resourceVersion: "2687986" + uid: 1567bb4b-87a7-4273-a8fd-f44125c62b80 +spec: {} +status: + nodeMetric: + nodeUsage: + resources: + cpu: 138m + memory: "1815637738" + podsMetric: + - name: storage-service-6c7c59f868-k72r5 + namespace: default + podUsage: + resources: + cpu: "300m" + memory: 17828Ki +``` + +## Colocation Resources + +After the Koordinator is deployed in the K8s cluster, the Koordinator will calculate the CPU and Memory resources that have been allocated but not used according to the data of `NodeMetrics`. These resources are updated in Node in the form of extended resources. + +`koordinator.sh/batch-cpu` represents the CPU resources for Best Effort workloads, +`koordinator.sh/batch-memory` represents the Memory resources for Best Effort workloads. + +You can view these resources with the following commands. + +```shell +$ kubectl describe node node-1 +Name: node-1 +.... +Capacity: + cpu: 8 + ephemeral-storage: 103080204Ki + koordinator.sh/batch-cpu: 4541 + koordinator.sh/batch-memory: 17236565027 + memory: 32611012Ki + pods: 64 +Allocatable: + cpu: 7800m + ephemeral-storage: 94998715850 + koordinator.sh/batch-cpu: 4541 + koordinator.sh/batch-memory: 17236565027 + memory: 28629700Ki + pods: 64 +``` + + +## Cluster-level Colocation Profile + +In order to make it easier for everyone to use Koordinator to co-locate different workloads, we defined `ClusterColocationProfile` to help gray workloads use co-location resources. A `ClusterColocationProfile` is CRD like the one below. Please do edit each parameter to fit your own use cases. + +```yaml +apiVersion: config.koordinator.sh/v1alpha1 +kind: ClusterColocationProfile +metadata: + name: colocation-profile-example +spec: + namespaceSelector: + matchLabels: + koordinator.sh/enable-colocation: "true" + selector: + matchLabels: + sparkoperator.k8s.io/launched-by-spark-operator: "true" + qosClass: BE + priorityClassName: koord-batch + koordinatorPriority: 1000 + schedulerName: koord-scheduler + labels: + koordinator.sh/mutated: "true" + annotations: + koordinator.sh/intercepted: "true" + patch: + spec: + terminationGracePeriodSeconds: 30 +``` + +Various Koordinator components ensure scheduling and runtime quality through labels `koordinator.sh/qosClass`, `koordinator.sh/priority` and kubernetes native priority. + +With the webhook mutating mechanism provided by Kubernetes, koord-manager will modify Pod resource requirements to co-located resources, and inject the QoS and Priority defined by Koordinator into Pod. + +Taking the above Profile as an example, when the Spark Operator creates a new Pod in the namespace with the `koordinator.sh/enable-colocation=true` label, the Koordinator QoS label `koordinator.sh/qosClass` will be injected into the Pod. According to the Profile definition PriorityClassName, modify the Pod's PriorityClassName and the corresponding Priority value. Users can also set the Koordinator Priority according to their needs to achieve more fine-grained priority management, so the Koordinator Priority label `koordinator.sh/priority` is also injected into the Pod. Koordinator provides an enhanced scheduler koord-scheduler, so you need to modify the Pod's scheduler name koord-scheduler through Profile. + +If you expect to integrate Koordinator into your own system, please learn more about the [core concepts](/docs/architecture/overview). + +## CPU Suppress + +In order to ensure the runtime quality of different workloads in co-located scenarios, Koordinator uses the CPU Suppress mechanism provided by koordlet on the node side to suppress workloads of the Best Effort type when the load increases. Or increase the resource quota for Best Effort type workloads when the load decreases. + +When installing through the helm chart, the ConfigMap `slo-controller-config` will be created in the koordinator-system namespace, and the CPU Suppress mechanism is enabled by default. If it needs to be closed, refer to the configuration below, and modify the configuration of the resource-threshold-config section to take effect. + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: {{ .Values.installation.namespace }} +data: + ... + resource-threshold-config: | + { + "clusterStrategy": { + "enable": false + } + } +``` + +## Colocation Resources Balance +Koordinator currently adopts a strategy for node co-location resource scheduling, which prioritizes scheduling to machines with more resources remaining in co-location to avoid Best Effort workloads crowding together. More rich scheduling capabilities are on the way. + +## Tutorial - Colocation of Spark Jobs + +Apache Spark is an analysis engine for large-scale data processing, which is widely used in Big Data, SQL Analysis and Machine Learning scenarios. +We provide a tutorial to help you how to quickly use Koordinator to run Spark Jobs in colocation mode with other latency sensitive applications. For more details, please refer to the [tutorial](/docs/best-practices/colocation-of-spark-jobs). + +## Summary + +Fore More details, please refer to the [Documentation](/docs). Hope it helps! diff --git a/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-04-19-release/index.md b/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-04-19-release/index.md new file mode 100644 index 000000000..d56ee64f9 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-04-19-release/index.md @@ -0,0 +1,98 @@ +--- +slug: release-v0.2.0 +title: Koordinator v0.2.0 - Enhanced node-side scheduling capabilities +authors: [joseph] +tags: [koordinator, colocation, kubernetes, scheduling, orchestration, release] +--- + + +We’re pleased to announce the release of Koordinator v0.2.0. + +## Overview + +Koordinator v0.1.0 implements basic co-location scheduling capabilities, and after the project was released, it has received attention and positive responses from the community. +For some issues that everyone cares about, such as how to isolate resources for best-effort workloads, how to ensure the runtime stability of latency-sensitiv applications in co-location scenarios, etc., we have enhanced node-side scheduling capabilities in koordinator v0.2.0 to solve these problems. + +## Install or Upgrade to Koordinator v0.2.0 + +### Install with helms + +Koordinator can be simply installed by helm v3.5+, which is a simple command-line tool and you can get it from [here](https://github.com/helm/helm/releases). + +```shell +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Install the latest version. +$ helm install koordinator koordinator-sh/koordinator --version 0.2.0 +``` + +### Upgrade with helm + +```shell +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Upgrade the latest version. +$ helm upgrade koordinator koordinator-sh/koordinator --version 0.2.0 [--force] +``` + +For more details, please refer to the [installation manual](/docs/installation). + +## Isolate resources for best-effort workloads + +In Koodinator v0.2.0, we refined the ability to isolate resources for best-effort worklods. + +`koordlet` will set the cgroup parameters according to the resources described in the Pod Spec. Currently supports setting CPU Request/Limit, and Memory Limit. + +For CPU resources, only the case of `request == limit` is supported, and the support for the scenario of `request <= limit` will be supported in the next version. + +## Active eviction mechanism based on memory safety thresholds + +When latency-sensitiv applications are serving, memory usage may increase due to bursty traffic. Similarly, there may be similar scenarios for best-effort workloads, for example, the current computing load exceeds the expected resource Request/Limit. + +These scenarios will lead to an increase in the overall memory usage of the node, which will have an unpredictable impact on the runtime stability of the node side. For example, it can reduce the quality of service of latency-sensitiv applications or even become unavailable. Especially in a co-location environment, it is more challenging. + +We implemented an active eviction mechanism based on memory safety thresholds in Koodinator. + +`koordlet` will regularly check the recent memory usage of node and Pods to check whether the safty threshold is exceeded. If it exceeds, it will evict some best-effort Pods to release memory. This mechanism can better ensure the stability of node and latency-sensitiv applications. + +`koordlet` currently only evicts best-effort Pods, sorted according to the Priority specified in the Pod Spec. The lower the priority, the higher the priority to be evicted, the same priority will be sorted according to the memory usage rate (RSS), the higher the memory usage, the higher the priority to be evicted. This eviction selection algorithm is not static. More dimensions will be considered in the future, and more refined implementations will be implemented for more scenarios to achieve more reasonable evictions. + +The current memory utilization safety threshold default value is 70%. You can modify the `memoryEvictThresholdPercent` in ConfigMap `slo-controller-config` according to the actual situation, + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + colocation-config: | + { + "enable": true + } + resource-threshold-config: | + { + "clusterStrategy": { + "enable": true, + "memoryEvictThresholdPercent": 70 + } + } +``` + +## CPU Burst - Improve the performance of latency-sensitive applications + +CPU Burst is a service level objective (SLO)-aware resource scheduling feature. You can use CPU Burst to improve the performance of latency-sensitive applications. CPU scheduling for a container may be throttled by the kernel due to the CPU limit, which downgrades the performance of the application. Koordinator automatically detects CPU throttling events and automatically adjusts the CPU limit to a proper value. This greatly improves the performance of latency-sensitive applications. + +The code of CPU Burst has been developed and is still under review and testing. It will be released in the next version. If you want to use this ability early, you are welcome to participate in Koordiantor and improve it together. For more details, please refer to the PR [#73](https://github.com/koordinator-sh/koordinator/pull/73). + +## More + +For more details, please refer to the [Documentation](/docs). Hope it helps! diff --git a/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-05-07-release/index.md b/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-05-07-release/index.md new file mode 100644 index 000000000..82a81d19a --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-05-07-release/index.md @@ -0,0 +1,256 @@ +--- +slug: release-v0.3.0 +title: What's New in Koordinator v0.3.0? +authors: [jason] +tags: [koordinator, colocation, kubernetes, scheduling, orchestration, release] +--- +We are happy to announce the v0.3.0 release of **Koordinator**. After starting small and learning what users needed, we +are able to adjust its path and develop features needed for a stable community release. + +The release of Koordinator v0.3.0 brings in some notable changes that are most wanted by the community while continuing +to expand on experimental features. + +## Install or Upgrade to Koordinator v0.3.0 + +### Install with helms + +Koordinator can be simply installed by helm v3.5+, which is a simple command-line tool, and you can get it +from [here](https://github.com/helm/helm/releases). + +```shell +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Install the latest version. +$ helm install koordinator koordinator-sh/koordinator --version 0.3.0 +``` + +### Upgrade with helm + +```shell +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Upgrade the latest version. +$ helm upgrade koordinator koordinator-sh/koordinator --version 0.3.0 [--force] +``` + +For more details, please refer to the [installation manual](/docs/installation). + +## CPU Burst + +CPU Burst is a service level objective (SLO)-aware resource scheduling feature provided by Koordinator. You can use CPU +Burst to improve the performance of latency-sensitive applications. CPU scheduling for +a container may be throttled by the kernel due to the CPU limit, which downgrades the performance of the application. +Koordlet automatically detects CPU throttling events and automatically adjusts the CPU limit to a +proper value. This greatly improves the performance of latency-sensitive applications. + +### How CPU Burst works + +Kubernetes allows you to specify CPU limits, which can be reused based on time-sharing. If you specify a CPU limit for a +container, the OS limits the amount of CPU resources that can be used by the container within a specific time period. +For example, you set the CPU limit of a container to 2. The OS kernel limits the CPU time slices that the container can +use to 200 milliseconds within each 100-millisecond period. + +CPU utilization is a key metric that is used to evaluate the performance of a container. In most cases, the CPU limit is +specified based on CPU utilization. CPU utilization on a per-millisecond basis shows more spikes than on a per-second +basis. If the CPU utilization of a container reaches the limit within a 100-millisecond period, CPU throttling is +enforced by the OS kernel and threads in the container are suspended for the rest of the time period. + +### How to use CPU Burst + +- Use an annotation to enable CPU Burst + + Add the following annotation to the pod configuration to enable CPU Burst: + +```yaml +annotations: + # Set the value to auto to enable CPU Burst for the pod. + koordinator.sh/cpuBurst: '{"policy": "auto"}' + # To disable CPU Burst for the pod, set the value to none. + #koordinator.sh/cpuBurst: '{"policy": "none"}' +``` + +- Use a ConfigMap to enable CPU Burst for all pods in a cluster + + Modify the slo-controller-config ConfigMap based on the + following content to enable CPU Burst for all pods in a cluster: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + cpu-burst-config: '{"clusterStrategy": {"policy": "auto"}}' + #cpu-burst-config: '{"clusterStrategy": {"policy": "cpuBurstOnly"}}' + #cpu-burst-config: '{"clusterStrategy": {"policy": "none"}}' +``` + +- Advanced configurations + + The following code block shows the pod annotations and ConfigMap fields that you can use for advanced configurations: + +```yaml +# Example of the slo-controller-config ConfigMap. +data: + cpu-burst-config: | + { + "clusterStrategy": { + "policy": "auto", + "cpuBurstPercent": 1000, + "cfsQuotaBurstPercent": 300, + "sharePoolThresholdPercent": 50, + "cfsQuotaBurstPeriodSeconds": -1 + } + } + + # Example of pod annotations. + koordinator.sh/cpuBurst: '{"policy": "auto", "cpuBurstPercent": 1000, "cfsQuotaBurstPercent": 300, "cfsQuotaBurstPeriodSeconds": -1}' +``` + +The following table describes the ConfigMap fields that you can use for advanced configurations of CPU Burst. + +| Field | Data type | Description | +|----------------------------|-----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| policy | string | | +| cpuBurstPercent | int | Default value:`1000`. Unit: %. This field specifies the percentage to which the CPU limit can be increased by CPU Burst. If the CPU limit is set to `1`, CPU Burst can increase the limit to 10 by default. | +| cfsQuotaBurstPercent | int | Default value: `300`. Unit: %. This field specifies the maximum percentage to which the value of cfs_quota in the cgroup parameters can be increased. By default, the value of cfs_quota can be increased to at most three times. | +| cfsQuotaBurstPeriodSeconds | int | Default value: `-1`. Unit: seconds. This indicates that the time period in which the container can run with an increased CFS quota is unlimited. This field specifies the time period in which the container can run with an increased CFS quota, which cannot exceed the upper limit specified by `cfsQuotaBurstPercent`. | +| sharePoolThresholdPercent | int | Default value: `50`. Unit: %. This field specifies the CPU utilization threshold of the node. If the CPU utilization of the node exceeds the threshold, the value of cfs_quota in cgroup parameters is reset to the original value. | + +## L3 cache and MBA resource isolation + +Pods of different priorities are usually deployed on the same machine. This may cause pods to compete for computing +resources. As a result, the quality of service (QoS) of your service cannot be ensured. The Resource Director +Technology (RDT) controls the Last Level Cache (L3 cache) that can be used by workloads of different priorities. RDT +also uses the Memory Bandwidth Allocation (MBA) feature to control the memory bandwidth that can be used by workloads. +This isolates the L3 cache and memory bandwidth used by workloads, ensures the QoS of high-priority workloads, and +improves overall resource utilization. This topic describes how to improve the resource isolation of pods with +different priorities by controlling the L3 cache and using the MBA feature. + +### How to use L3 cache and MBA resource isolation + +- Use a ConfigMap to enable L3 cache and MBA resource isolation for all pods in a cluster + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + resource-qos-config: |- + { + "clusterStrategy": { + "lsClass": { + "resctrlQOS": { + "enable": true, + "catRangeStartPercent": 0, + "catRangeEndPercent": 100, + "MBAPercent": 100 + } + }, + "beClass": { + "resctrlQOS": { + "enable": true, + "catRangeStartPercent": 0, + "catRangeEndPercent": 30, + "MBAPercent": 100 + } + } + } + } +``` + +## Memory QoS + +The Koordlet provides the memory quality of service (QoS) feature for containers. You can use this +feature to optimize the performance of memory-sensitive applications while ensuring fair memory scheduling among +containers. This topic describes how to enable the memory QoS feature for containers. + +### Background information + +The following memory limits apply to containers: + +- The memory limit of the container. If the amount of memory that a container uses, including the page cache, is about + to reach the memory limit of the container, the memory reclaim mechanism of the OS kernel is triggered. As a result, + the application in the container may not be able to request or release memory resources as normal. +- The memory limit of the node. If the memory limit of a container is greater than the memory request of the container, + the container can overcommit memory resources. In this case, the available memory on the node may become insufficient. + This causes the OS kernel to reclaim memory from containers. As a result, the performance of your application is + downgraded. In extreme cases, the node cannot run as normal. + +To improve the performance of applications and the stability of nodes, Koordinator provides the memory QoS feature for +containers. We recommend that you use Anolis OS as the node OS. For other OS, we will try our best to adapt, and users +can still enable it without side effects. After you enable the memory QoS feature for a container, Koordlet +automatically configures the memory control group (memcg) based on the configuration of the container. This helps you +optimize the performance of memory-sensitive applications while ensuring fair memory scheduling on the node. + +### How to use Memory QoS + +When you enable memory QoS for the containers in a pod, the memcg is automatically configured based on the specified +ratios and pod parameters. To enable memory QoS for the containers in a pod, perform the following steps: + +1. Add the following annotations to enable memory QoS for the containers in a pod: + +```yaml +annotations: + # To enable memory QoS for the containers in a pod, set the value to auto. + koordinator.sh/memoryQOS: '{"policy": "auto"}' + # To disable memory QoS for the containers in a pod, set the value to none. + #koordinator.sh/memoryQOS: '{"policy": "none"}' +``` + +2. Use a ConfigMap to enable memory QoS for all the containers in a cluster. + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + resource-qos-config: |- + { + "clusterStrategy": { + "lsClass": { + "memoryQOS": { + "enable": true + } + }, + "beClass": { + "memoryQOS": { + "enable": true + } + } + } + } +``` + +3. Optional. Configure advanced parameters. + + The following table describes the advanced parameters that you can use to configure fine-grained memory QoS + configurations at the pod level and cluster level. + +| Parameter | Data type | Valid value | Description | +| ------------------- | ----------- | --------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| enable | Boolean | | | +| policy | String | | | +| minLimitPercent | Int | 0~100 | Unit: %. Default value:`0`. The default value indicates that this parameter is disabled. This parameter specifies the unreclaimable proportion of the memory request of a pod. The amount of unreclaimable memory is calculated based on the following formula: `Value of memory.min = Memory request × Value of minLimitPercent/100`. This parameter is suitable for scenarios where applications are sensitive to the page cache. You can use this parameter to cache files to optimize read and write performance. For example, if you specify Memory `Request=100MiB` and `minLimitPercent=100` for a container, `the value of memory.min is 104857600`. | +| lowLimitPercent | Int | 0~100 | Unit: %. Default value:`0`. The default value indicates that this parameter is disabled. This parameter specifies the relatively unreclaimable proportion of the memory request of a pod. The amount of relatively unreclaimable memory is calculated based on the following formula: `Value of memory.low = Memory request × Value of lowLimitPercent/100`. For example, if you specify `Memory Request=100MiB` and `lowLimitPercent=100` for a container, `the value of memory.low is 104857600`. | +| throttlingPercent | Int | 0~100 | Unit: %. Default value:`0`. The default value indicates that this parameter is disabled. This parameter specifies the memory throttling threshold for the ratio of the memory usage of a container to the memory limit of the container. The memory throttling threshold for memory usage is calculated based on the following formula: `Value of memory.high = Memory limit × Value of throttlingPercent/100`. If the memory usage of a container exceeds the memory throttling threshold, the memory used by the container will be reclaimed. This parameter is suitable for container memory overcommitment scenarios. You can use this parameter to cgroups from triggering OOM. For example, if you specify `Memory Limit=100MiB` and `throttlingPercent=80` for a container, `the value of memory.high is 83886080`, which is equal to 80 MiB. | +| wmarkRatio | Int | 0~100 | Unit: %. Default value:`95`. A value of `0` indicates that this parameter is disabled. This parameter specifies the threshold of the usage of the memory limit or the value of `memory.high` that triggers asynchronous memory reclaim. If `throttlingPercent` is disabled, the asynchronous memory reclaim threshold for memory usage is calculated based on the following formula: `Value of memory.wmark_high = Memory limit × wmarkRatio/100`. If `throttlingPercent` is enabled, the asynchronous memory reclaim threshold for memory usage is calculated based on the following formula: `Value of memory.wmark_high = Value of memory.high × wmarkRatio/100`. If the usage of the memory limit or the value of memory.high exceeds the threshold, the memcg backend asynchronous reclaim feature is triggered. For example, if you specify `Memory Limit=100MiB`for a container, the memory throttling setting is`memory.high=83886080`, the reclaim ratio setting is `memory.wmark_ratio=95`, and the reclaim threshold setting is `memory.wmark_high=79691776`. | +| wmarkMinAdj | Int | -25~50 | Unit: %. The default value is `-25` for the `LS`/ `LSR` QoS class and `50` for the `BE` QoS class. A value of 0 indicates that this parameter is disabled. This parameter specifies the adjustment to the global minimum watermark for a container. A negative value decreases the global minimum watermark and therefore postpones memory reclaim for the container. A positive value increases the global minimum watermark and therefore antedates memory reclaim for the container. For example, if you create a pod whose QoS class is LS, the default setting of this parameter is `memory.wmark_min_adj=-25`, which indicates that the minimum watermark is decreased by 25% for the containers in the pod. | + +## What Comes Next + +For more details, please refer to our [milestone](https://github.com/koordinator-sh/koordinator/milestones). Hope it +helps! diff --git a/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-05-31-release/index.md b/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-05-31-release/index.md new file mode 100644 index 000000000..b08516909 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-05-31-release/index.md @@ -0,0 +1,207 @@ +--- +slug: release-v0.4.0 +title: What's New in Koordinator v0.4.0? +authors: [joseph] +tags: [release] +--- + +We are happy to announce the release of Koordinator v0.4.0. Koordinator v0.4.0 brings in some notable changes that are most wanted by the community while continuing to expand on experimental features. And in this version, we started to gradually enhance the capabilities of the scheduler. + +## Install or Upgrade to Koordinator v0.4.0 + +### Install with helms + +Koordinator can be simply installed by helm v3.5+, which is a simple command-line tool, and you can get it +from [here](https://github.com/helm/helm/releases). + +```shell +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Install the latest version. +$ helm install koordinator koordinator-sh/koordinator --version 0.4.0 +``` + +### Upgrade with helm + +```shell +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Upgrade the latest version. +$ helm upgrade koordinator koordinator-sh/koordinator --version 0.4.0 [--force] +``` + +For more details, please refer to the [installation manual](/docs/installation). + +## Enhanced node-side scheduling capabilities + +### Custom memory evict threshold + +In the Koordinator v0.2.0, an ability to improve the stability of the node side in the co-location scenario was introduced: [Active eviction mechanism based on memory safety thresholds](/blog/release-v0.2.0#active-eviction-mechanism-based-on-memory-safety-thresholds). The current memory utilization safety threshold default value is 70%, now in the v0.4.0 version, you can modify the `memoryEvictThresholdPercent` with 60% in ConfigMap `slo-controller-config` according to the actual situation: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + colocation-config: | + { + "enable": true + } + resource-threshold-config: | + { + "clusterStrategy": { + "enable": true, + "memoryEvictThresholdPercent": 60 + } + } +``` + +### BE Pods eviction based on satisfaction + +In order to ensure the runtime quality of different workloads in co-location scenarios, Koordinator uses the CPU Suppress mechanism provided by koordlet on the node side to suppress workloads of the best effort type when the load increases. Or increase the resource quota for best effort type workloads when the load decreases. + +However, it is not suitable if there are many best effort Pods on the node and they are frequently suppressed. Therefore, in version v0.4.0, Koordinator provides an eviction mechanism based on satisfaction of the requests for the best effort Pods. If the best effort Pods are frequently suppressed, the requests of the best effort Pods cannot be satisfied, and the satisfaction is generally less than 1; if the best effort Pods are not suppressed and more CPU resources are obtained when the node resources are idle, then the requests of the best effort Pods can be satisfied, and the satisfaction is greater than or equal to 1. If the satisfaction is less than the specified threshold, and the CPU utilization of the best effort Pods is close to 100%, `koordlet` will evict some best effort Pods to improve the runtime quality of the node. The priority with lower priority or with higher CPU utilization of the same priority is evicted. + +You can modify the ConfigMap `slo-controller-config` according to the actual situation: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + colocation-config: | + { + "enable": true + } + resource-threshold-config: | + { + "clusterStrategy": { + "enable": true, + "cpuEvictBESatisfactionUpperPercent": 80, + "cpuEvictBESatisfactionLowerPercent": 60 + } + } +``` + +### Group identity + +When latency-sensitive applications and best effort workloads are deployed on the same node, the Linux kernel scheduler must provide more scheduling opportunities to high-priority applications to minimize scheduling latency and the impacts of low-priority workloads on kernel scheduling. For this scenario, Koordinator integrated with the group identity allowing users to configure scheduling priorities to CPU cgroups. + +Alibaba Cloud Linux 2 with a kernel of the kernel-4.19.91-24.al7 version or later supports the group identity feature. The group identity feature relies on a dual red-black tree architecture. A low-priority red-black tree is added based on the red-black tree of the Completely Fair Scheduler (CFS) scheduling queue to store low-priority workloads. When the kernel schedules the workloads that have identities, the kernel processes the workloads based on their priorities. For more details, please refer to the [doc](https://www.alibabacloud.com/help/en/elastic-compute-service/latest/group-identity-feature). + +Koordinator defines group identity default values for Pods of different QoS types: + +| QoS | Default Value | +|-----|---------------| +| LSR | 2 | +| LS | 2 | +| BE | -1 | + +You can modify the ConfigMap `slo-controller-config` to set group identity values according to the actual situation: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + colocation-config: | + { + "enable": true + } + resource-qos-config: | + { + "clusterStrategy": { + "lsrClass": { + "cpuQOS": { + "enable": true, + "groupIdentity": 2 + } + }, + "lsClass": { + "cpuQOS": { + "enable": true, + "groupIdentity": 2 + } + }, + "beClass": { + "cpuQOS": { + "enable": true, + "groupIdentity": -1 + } + }, + "systemClass": { + "cpuQOS": { + "enable": true, + "groupIdentity": 2 + } + } + } + } +``` + +To enable this feature, you need to update the kernel and configuration file, then install the new component `koord-runtime-proxy` of koordinator. + +## koord-runtime-proxy (experimental) + +`koord-runtime-proxy` acts as a proxy between kubelet and containerd(dockerd under dockershim scenario), which is designed to intercept CRI request, and apply some resource management policies, such as setting different cgroup parameters by pod priorities under hybrid workload orchestration scenario, applying new isolation policies for latest Linux kernel, CPU architecture, and etc. + +There are two components involved, koord-runtime-proxy and RuntimePlugins. + +![image](/img/koord-runtime-proxy-architecture.svg) + +### koord-runtime-proxy +koord-runtime-proxy is in charge of intercepting request during pod's lifecycle, such as RunPodSandbox, CreateContainer etc., and then calling RuntimePlugins to do resource isolation policies before transferring request to backend containerd(dockerd) and after transferring response to kubelet. koord-runtime-proxy provides an isolation-policy-execution framework which allows customized plugins registered to do specified isolation policies, these plugins are called RuntimePlugins. koord-runtime-proxy itself does NOT do any isolation policies. + +### RuntimePlugins +RuntimePlugins register events(RunPodSandbox etc.) to koord-runtime-proxy and would receive notifications when events happen. RuntimePlugins should complete resource isolation policies basing on the notification message, and then response koord-runtime-proxy, koord-runtime-proxy would decide to transfer request to backend containerd or discard request according to plugins' response. + +If no RuntimePlugins registered, koord-runtime-proxy would become a transparent proxy between kubelet and containerd. + +For more details, please refer to the [design doc](https://github.com/koordinator-sh/koordinator/blob/main/docs/design-archive/runtime-manager-design-doc.md). + +### Installation + +When installing koord-runtime-proxy, you need to change the startup parameters of the kubelet, set the CRI parameters to point to the koord-runtime-proxy, and configure the CRI parameters of the corresponding container runtime when installing the koord-runtime-proxy. + +koord-runtime-proxy is in the Alpha experimental version stage. Currently, it provides a minimum set of extension points. At the same time, there may be some bugs. You are welcome to try it and give feedback. + +For detailed installation process, please refer to the [manual](/docs/installation#install-koord-runtime-proxy-experimental). + +## Load-Aware Scheduling + +Although Koordinator provides the co-location mechanism to improve the resource utilization of the cluster and reduce costs, it does not yet have the ability to control the utilization level of the cluster dimension, Best Effort workloads may also interfere with latency-sensitive applications. Load-aware scheduling plugin helps Koordinator to achieve this capability. + +The scheduling plugin filters abnormal nodes and scores them according to resource usage. This scheduling plugin extends the Filter/Score/Reserve/Unreserve extension points defined in the Kubernetes scheduling framework. + +By default, abnormal nodes are filtered, and users can decide whether to enable or not by configuring as needed. +- Filter nodes where koordlet fails to update NodeMetric. +- Filter nodes by utilization thresholds. If the configuration enables, the plugin will exclude nodes with *latestUsageUtilization >= utilizationThreshold*. + +This plugin is dependent on NodeMetric's reporting period. Different reporting periods need to be set according to different scenarios and workloads. Therefore, NodeMetricSpec has been extended to support user-defined reporting period and aggregation period. Users can modify `slo-controller-config` to complete the corresponding configuration, and the controller in `koord-manager` will be responsible for updating the reporting period and aggregation period fields of NodeMetrics of related nodes. + +Currently, the resource utilization thresholds of nodes are configured based on experience to ensure the runtime quality of nodes. But there are also ways to evaluate the workload running on the node to arrive at a more appropriate threshold for resource utilization. For example, in a time-sharing scenario, a higher threshold can be set to allow scheduling to run more best effort workloads during the valley of latency-sensitive applications. When the peak of latency-sensitive applications comes up, lower the threshold and evict some best effort workloads. In addition, 3-sigma can be used to analyze the utilization level in the cluster to obtain a more appropriate threshold. + +The core logic of the scoring algorithm is to select the node with the smallest resource usage. However, considering the delay of resource usage reporting and the delay of Pod startup time, the resource requests of the Pods that have been scheduled and the Pods currently being scheduled within the time window will also be estimated, and the estimated values will be involved in the calculation. + +At present, Koordinator does not have the ability to profile workloads. Different types of workloads have different ways of building profiles. For example, long-running pods need to be scheduled with long-period profiling, while short-period pods should be scheduled with short-period profiling. + +For more details, please refer to the [proposal](https://github.com/koordinator-sh/koordinator/blob/main/docs/proposals/scheduling/20220510-load-aware-scheduling.md). + +## What Comes Next + +For more details, please refer to our [milestone](https://github.com/koordinator-sh/koordinator/milestones). Hope it +helps! diff --git a/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-06-30-release/index.md b/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-06-30-release/index.md new file mode 100644 index 000000000..a9c488648 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-06-30-release/index.md @@ -0,0 +1,220 @@ +--- +slug: release-v0.5.0 +title: "Koordinator v0.5: Now With Node Resource Topology And More" +authors: [jason] +tags: [release] +--- + +In addition to the usual updates to supporting utilities, Koordinator v0.5 adds a couple of new useful features we think +you'll like. + +## Install or Upgrade to Koordinator v0.5.0 + +### Install with helms + +Koordinator can be simply installed by helm v3.5+, which is a simple command-line tool, and you can get it +from [here](https://github.com/helm/helm/releases). + +```shell +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Install the latest version. +$ helm install koordinator koordinator-sh/koordinator --version 0.5.0 +``` + +### Upgrade with helm + +```shell +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Upgrade the latest version. +$ helm upgrade koordinator koordinator-sh/koordinator --version 0.5.0 [--force] +``` + +For more details, please refer to the [installation manual](/docs/installation). + +## Fine-grained CPU Orchestration + +In this version, we introduced a fine-grained CPU orchestration. Pods in the Kubernetes cluster may interfere with +others' running when they share the same physical resources and both demand many resources. The sharing of CPU resources +is almost inevitable. e.g. SMT threads (i.e. logical processors) share execution units of the same core, and cores in +the same chip share one last-level cache. The resource contention can slow down the running of these CPU-sensitive +workloads, resulting in high response latency (RT). + +To improve the performance of CPU-sensitive workloads, koord-scheduler provides a mechanism of fine-grained CPU +orchestration. It enhances the CPU management of Kubernetes and supports detailed NUMA-locality and CPU exclusions. + +Please check out our [user manual](/docs/user-manuals/fine-grained-cpu-orchestration) for a detailed introduction and +tutorial. + +## Resource Reservation + +Pods are fundamental units for allocating node resources in Kubernetes, which bind resource requirements with business +logic. The scheduler is not able to reserve node resources for specific pods or workloads. We may try using a fake pod +to prepare resources by the preemption mechanism. However, fake pods can be preempted by any scheduled pods with higher +priorities, which make resources get scrambled unexpectedly. + +In Koordinator, a resource reservation mechanism is proposed to enhance scheduling and especially benefits scenarios +below: + +1. Preemption: Existing preemption does not guarantee that only preempting pods can allocate preempted resources. With a + reservation, the scheduler should be able to "lock" resources preventing from allocation of other pods with the same + or + higher priority. +2. De-scheduling: For the descheduler, it is better to ensure sufficient resources with the reservation before pods get + rescheduled. Otherwise, rescheduled pods may not be runnable anymore and make the belonging application disrupted. +3. Horizontal scaling: Using reservation to achieve more deterministic horizontal scaling. e.g. Submit a reservation and + make sure it is available before scaling up replicas. +4. Resource Pre-allocation: Sometimes we want to pre-allocate node resources for future resource demands even if the + resources are not currently allocatable. Reservation can help with this and it should make no physical cost. + +This feature is still under development. We've finalized the API, feel free to check it out. + +``` +type Reservation struct { + metav1.TypeMeta `json:",inline"` + // A Reservation object is non-namespaced. + // It can reserve resources for pods of any namespace. Any affinity/anti-affinity of reservation scheduling can be + // specified in the pod template. + metav1.ObjectMeta `json:"metadata,omitempty"` + Spec ReservationSpec `json:"spec,omitempty"` + Status ReservationStatus `json:"status,omitempty"` +} + +type ReservationSpec struct { + // Template defines the scheduling requirements (resources, affinities, images, ...) processed by the scheduler just + // like a normal pod. + // If the `template.spec.nodeName` is specified, the scheduler will not choose another node but reserve resources on + // the specified node. + Template *corev1.PodTemplateSpec `json:"template,omitempty"` + // Specify the owners who can allocate the reserved resources. + // Multiple owner selectors and ANDed. + Owners []ReservationOwner `json:"owners,omitempty"` + // By default, the resources requirements of reservation (specified in `template.spec`) is filtered by whether the + // node has sufficient free resources (i.e. ReservationRequest < NodeFree). + // When `preAllocation` is set, the scheduler will skip this validation and allow overcommitment. The scheduled + // reservation would be waiting to be available until free resources are sufficient. + PreAllocation bool `json:"preAllocation,omitempty"` + // Time-to-Live period for the reservation. + // `expires` and `ttl` are mutually exclusive. If both `ttl` and `expires` are not specified, a very + // long TTL will be picked as default. + TTL *metav1.Duration `json:"ttl,omitempty"` + // Expired timestamp when the reservation expires. + // `expires` and `ttl` are mutually exclusive. Defaults to being set dynamically at runtime based on the `ttl`. + Expires *metav1.Time `json:"expires,omitempty"` +} + +type ReservationStatus struct { + // The `phase` indicates whether is reservation is waiting for process (`Pending`), available to allocate + // (`Available`) or expired to get cleanup (Expired). + Phase ReservationPhase `json:"phase,omitempty"` + // The `conditions` indicate the messages of reason why the reservation is still pending. + Conditions []ReservationCondition `json:"conditions,omitempty"` + // Current resource owners which allocated the reservation resources. + CurrentOwners []corev1.ObjectReference `json:"currentOwners,omitempty"` +} + +type ReservationOwner struct { + // Multiple field selectors are ORed. + Object *corev1.ObjectReference `json:"object,omitempty"` + Controller *ReservationControllerReference `json:"controller,omitempty"` + LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"` +} + +type ReservationControllerReference struct { + // Extend with a `namespace` field for reference different namespaces. + metav1.OwnerReference `json:",inline"` + Namespace string `json:"namespace,omitempty"` +} + +type ReservationPhase string + +const ( + // ReservationPending indicates the Reservation has not been processed by the scheduler or is unschedulable for + // some reasons (e.g. the resource requirements cannot get satisfied). + ReservationPending ReservationPhase = "Pending" + // ReservationAvailable indicates the Reservation is both scheduled and available for allocation. + ReservationAvailable ReservationPhase = "Available" + // ReservationWaiting indicates the Reservation is scheduled, but the resources to reserve are not ready for + // allocation (e.g. in pre-allocation for running pods). + ReservationWaiting ReservationPhase = "Waiting" + // ReservationExpired indicates the Reservation is expired, which the object is not available to allocate and will + // get cleaned in the future. + ReservationExpired ReservationPhase = "Expired" +) + +type ReservationCondition struct { + LastProbeTime metav1.Time `json:"lastProbeTime"` + LastTransitionTime metav1.Time `json:"lastTransitionTime"` + Reason string `json:"reason"` + Message string `json:"message"` +} +``` + +## QoS Manager + +Currently, plugins from resmanager in Koordlet are mixed together, they should be classified into two +categories: `static` and `dynamic`. Static plugins will be called and run only once when a container created, updated, +started or stopped. However, for dynamic plugins, they may be called and run at any time according the real-time runtime +states of node, such as CPU suppress, CPU burst, etc. This proposal only focuses on refactoring dynamic plugins. Take a +look at current plugin implementation, there are many function calls to resmanager's methods directly, such as +collecting node/pod/container metrics, fetching metadata of node/pod/container, fetching configurations(NodeSLO, etc.). +In the feature, we may need a flexible and powerful framework with scalability for special external plugins. + +The below is directory tree of qos-manager inside koordlet, all existing dynamic plugins(as built-in plugins) will be +moved into sub-directory `plugins`. + +``` +pkg/koordlet/qosmanager/ + - manager.go + - context.go // plugin context + - /plugins/ // built-in plugins + - /cpubrust/ + - /cpusuppress/ + - /cpuevict/ + - /memoryevict/ +``` + +We only have the proposal in this version. Stay tuned, further implementation is coming soon! + +## Multiple Running Hook Modes + +`Runtime Hooks` includes a set of plugins which are responsible for the injections of resource isolation parameters +by pod attribute. When `Koord Runtime Proxy` running as a CRI Proxy, `Runtime Hooks` acts as the backend server. The +mechanism of CRI Proxy can ensure the consistency of resource parameters during pod lifecycle. However, +`Koord Runtime Proxy` can only hijack CRI requests from kubelet for pods, the consistency of resource parameters in +QoS class directory cannot be guaranteed. Besides, modification of pod parameters from third-party(e.g. manually) will +also break the correctness of hook plugins. + +Therefore, a standalone running mode with reconciler for `Runtime Hooks` is necessary. Under `Standalone` running +mode, resource isolation parameters will be injected asynchronously, keeping eventual consistency of the injected +parameters for pod and QoS class even without `Runtime Hook Manager`. + +## Some minor works + +1. We fix the backward compatibility issues reported by our users + in [here](https://github.com/koordinator-sh/koordinator/issues/310). If you've ever encountered similar problem, + please upgrade to the latest version. +2. Two more interfaces were added into runtime-proxy. One is `PreCreateContainerHook`, which could set container + resources setting before creating, and the other is `PostStopSandboxHook`, which could do the resource setting + garbage collecting before pods deleted. +3. `cpuacct.usage` is more precise than `cpuacct.stat`, and `cpuacct.stat` is in USER_HZ unit, while `cpuacct.usage` is + nanoseconds. After thorough discussion, we were on the same page that we replace `cpuacct.stat` with `cpuacct.usage` + in koordlet. +4. Koordlet needs to keep fetching data from kubelet. Before this version, we only support accessing kubelet via + read-only port over HTTP. Due to security concern, we've enabled HTTPS access in this version. For more details, + please refer to this [PR](https://github.com/koordinator-sh/koordinator/pull/320). + +## What’s coming next in Koordinator + +Don't forget that Koordinator is developed in the open. You can check out our Github milestone to know more about what +is happening and what we have planned. For more details, please refer to +our [milestone](https://github.com/koordinator-sh/koordinator/milestones). Hope it helps! \ No newline at end of file diff --git a/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-08-04-release/index.md b/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-08-04-release/index.md new file mode 100644 index 000000000..c3eac6531 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-08-04-release/index.md @@ -0,0 +1,166 @@ +--- +slug: release-v0.6.0 +title: "Koordinator v0.6: Complete fine-grained CPU orchestration, Resource Reservation and Descheduling" +authors: [joseph] +tags: [release] +--- + +We are happy to announce the release of Koordinator v0.6.0. Koordinator v0.6.0 brings complete Fine-grained CPU Orchestration, Resource Reservation mechanism, safely Pod Migration mechanism and Descheduling Framework. + +## Install or Upgrade to Koordinator v0.6.0 + +### Install with helms + +Koordinator can be simply installed by helm v3.5+, which is a simple command-line tool, and you can get it +from [here](https://github.com/helm/helm/releases). + +```shell +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Install the latest version. +$ helm install koordinator koordinator-sh/koordinator --version 0.6.0 +``` + +### Upgrade with helm + +```shell +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Upgrade the latest version. +$ helm upgrade koordinator koordinator-sh/koordinator --version 0.6.0 [--force] +``` + +For more details, please refer to the [installation manual](/docs/installation). + +## Fine-grained CPU Orchestration + +In Koordinator v0.5.0, we designed and implemented basic CPU orchestration capabilities. The koord-scheduler supports different CPU bind policies to help LSE/LSR Pods achieve better performance. + +Now in the v0.6 version, we have basically completed the CPU orchestration capabilities originally designed, such as: +- Support default CPU bind policy configured by koord-scheduler for LSR/LSE Pods that do not specify a CPU bind policy +- Support CPU exclusive policy that supports `PCPULevel` and `NUMANodeLevel`, which can spread the CPU-bound Pods to different physical cores or NUMA Nodes as much as possible to reduce the interference between Pods. +- Support Node CPU Orchestration API to helper cluster administrators control the CPU orchestration behavior of nodes. The label `node.koordinator.sh/cpu-bind-policy` constrains how to bind CPU logical CPUs when scheduling. If set with the `FullPCPUsOnly` that requires that the scheduler must allocate full physical cores. Equivalent to kubelet CPU manager policy option `full-pcpus-only=true`. If there is no `node.koordinator.sh/cpu-bind-policy` in the node's label, it will be executed according to the policy configured by the Pod or koord-scheduler. The label `node.koordinator.sh/numa-allocate-strategy` indicates how to choose satisfied NUMA Nodes when scheduling. Support `MostAllocated` and `LeastAllocated`. +- koordlet supports the LSE Pods and improve compatibility with existing Guaranteed Pods with static CPU Manager policy. + + +Please check out our [user manual](/docs/user-manuals/fine-grained-cpu-orchestration) for a detailed introduction and +tutorial. + +## Resource Reservation + +We completed the `Resource Reservation API` design proposal in v0.5, and implemented the basic Reservation mechanism in the current v0.6 version. + +When you want to use the Reservation mechanism to reserve resources, you do not need to modify the Pod or the existing workloads(e.g. Deployment, StatefulSet). koord-scheduler provides a simple to use API named `Reservation`, which allows us to reserve node resources for specified pods or workloads even if they haven't get created yet. You only need to write the Pod Template and the owner information in the ReservationSpec when creating a Reservation. When koord-scheduler perceives a new Reservation object, it will allocate resources to the Reservation object through the normal Pod scheduling process. After scheduling, koord-scheduler will update the success or failure information to ResourceStatus. If the reservation is successful, and the OwnerReference or Labels of the newly created Pod satisfy the owner information declared earlier, then the newly created Pod will directly reuse the resources held by the Reservation. When the Pod is destroyed, the Reservation object can be reused until the Reservation expires. + +![image](/img/resource-reservation.svg) + +The resource reservation mechanism can help solve or optimize the problems in the following scenarios: + +1. Preemption: Existing preemption does not guarantee that only preempting pods can allocate preempted resources. With a + reservation, the scheduler should be able to "lock" resources preventing from allocation of other pods with the same + or higher priority. +2. Descheduling: For the descheduler, it is better to ensure sufficient resources with the reservation before pods get + rescheduled. Otherwise, rescheduled pods may not be runnable anymore and make the belonging application disrupted. +3. Horizontal scaling: Using reservation to achieve more deterministic horizontal scaling. e.g. Submit a reservation and + make sure it is available before scaling up replicas. +4. Resource Pre-allocation: Sometimes we want to pre-allocate node resources for future resource demands even if the + resources are not currently allocatable. Reservation can help with this and it should make no physical cost. + +- Please check out our [user manual](/docs/user-manuals/resource-reservation) for a detailed introduction and +tutorial. +- For more information, please see [Design: Resource Reservation](/docs/designs/resource-reservation) + +## Pod Migration Job + +Migrating Pods is an important capability that many components (such as descheduler) rely on, and can be used to optimize scheduling or help resolve workload runtime quality issues. We believe that pod migration is a complex process, involving steps such as auditing, resource allocation, and application startup, and is mixed with application upgrading, scaling scenarios, resource operation and maintenance operations by cluster administrators. Therefore, how to manage the stability risk of this process to ensure that the application does not fail due to the migration of Pods is a very critical issue that must be resolved. + +The descheduler in the K8s community evicts pods according to different strategies. However, it does not guarantee whether the evicted Pod has resources available after re-creation. If a large number of newly created Pods are in the Pending state when the resources in the cluster are tight, may lower the application availabilities. + +Koordinator defines a CRD-based Migration/Eviction API named `PodMigrationAPI`, through which the descheduler or other components can evict or delete Pods more safely. With PodMigrationJob we can track the status of each process in the migration, and perceive scenarios such as upgrading and scaling of the application. + +It's simple to use the PodMigrationJob API. Create a `PodMigrationJob` with the YAML file below to migrate `pod-demo-0`. + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: PodMigrationJob +metadata: + name: migrationjob-demo +spec: + paused: false + ttl: 5m + mode: ReservationFirst + podRef: + namespace: default + name: pod-demo-5f9b977566-c7lvk +status: + phase: Pending +``` + +```bash +$ kubectl create -f migrationjob-demo.yaml +podmigrationjob.scheduling.koordinator.sh/migrationjob-demo created +``` + +Then you can query the migration status and query the migration events + +```bash +$ kubectl get podmigrationjob migrationjob-demo +NAME PHASE STATUS AGE NODE RESERVATION PODNAMESPACE POD NEWPOD TTL +migrationjob-demo Succeed Complete 37s node-1 d56659ab-ba16-47a2-821d-22d6ba49258e default pod-demo-5f9b977566-c7lvk pod-demo-5f9b977566-nxjdf 5m0s + +$ kubectl describe podmigrationjob migrationjob-demo +... +Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal ReservationCreated 8m33s koord-descheduler Successfully create Reservation "d56659ab-ba16-47a2-821d-22d6ba49258e" + Normal ReservationScheduled 8m33s koord-descheduler Assigned Reservation "d56659ab-ba16-47a2-821d-22d6ba49258e" to node "node-1" + Normal Evicting 8m33s koord-descheduler Try to evict Pod "default/pod-demo-5f9b977566-c7lvk" + Normal EvictComplete 8m koord-descheduler Pod "default/pod-demo-5f9b977566-c7lvk" has been evicted + Normal Complete 8m koord-descheduler Bind Pod "default/pod-demo-5f9b977566-nxjdf" in Reservation "d56659ab-ba16-47a2-821d-22d6ba49258e" +``` + +- Please check out our [user manual](/docs/user-manuals/pod-migration-job) for a detailed introduction and +tutorial. +- For more information, please see [Design: PodMigrationJob](/docs/designs/pod-migration-job). + +## Descheduling Framework + +We implemented a brand new Descheduling Framework in v0.6. + +The existing descheduler in the community can solve some problems, but we think that there are still many aspects of the descheduler that can be improved, for example, it only supports the mode of periodic execution, and does not support the event-triggered mode. It is not possible to extend and configure custom descheduling strategies without invading the existing code of descheduler like kube-scheduler; it also does not support implementing custom evictor. + +We also noticed that the K8s descheduler community also found these problems and proposed corresponding solutions such as [#753 Descheduler framework Proposal](https://github.com/kubernetes-sigs/descheduler/issues/753) and [PoC #781](https://github.com/kubernetes-sigs/descheduler/pull/781). The K8s descheduler community tries to implement a descheduler framework similar to the k8s scheduling framework. This coincides with our thinking. + +Overall, these solutions solved most of our problems, but we also noticed that the related implementations were not merged into the main branch. But we review these implementations and discussions, and we believe this is the right direction. Considering that Koordiantor has clear milestones for descheduler-related features, we will implement Koordinator's own descheduler independently of the upstream community. We try to use some of the designs in the [#753 PR](https://github.com/kubernetes-sigs/descheduler/issues/753) proposed by the community and we will follow the Koordinator's compatibility principle with K8s to maintain compatibility with the upstream community descheduler when implementing. Such as independent implementation can also drive the evolution of the upstream community's work on the descheduler framework. And when the upstream community has new changes or switches to the architecture that Koordinator deems appropriate, Koordinator will follow up promptly and actively. + +Based on this descheduling framework, it is very easy to be compatible with the existing descheduling strategies in the K8s community, and users can implement and integrate their own descheduling plugins as easily as K8s Scheduling Framework. At the same time, users are also supported to implement Controller in the form of plugins to realize event-based descheduling scenarios. At the same time, the framework integrates the `MigrationController` based on PodMigrationJob API and serves as the default Evictor plugin to help safely migrate Pods in various descheduling scenarios. + +At present, we have implemented the main body of the framework, including the MigrationController based on PodMigrationJob, which is available as a whole. And we also provide [a demo descheduling plugin](https://github.com/koordinator-sh/koordinator/blob/main/pkg/descheduler/framework/plugins/removepodsviolatingnodeaffinity/node_affinity.go). In the future, we will migrate and be compatible with the existing descheduling policies of the community, as well as the load balancing descheduling plugin provided for co-location scenarios. + +The current framework is still in the early stage of rapid evolution, and there are still many details that need to be improved. Everyone who is interested is welcome to participate in the construction together. We hope that more people can be more assured and simpler to realize the descheduling capabilities they need. + +- For more information, please see [Design: descheduling framework](/docs/designs/descheduler-framework). +- For specific implementation, please see [pkg/descheduler](https://github.com/koordinator-sh/koordinator/tree/main/pkg/descheduler). + +## About GPU Scheduling + +There are also some new developments in GPU scheduling capabilities that everyone cares about. + +During the iteration of v0.6, we completed the design of [GPU Share Scheduling](https://github.com/koordinator-sh/koordinator/blob/main/docs/proposals/scheduling/20220629-fine-grained-device-scheduling.md), and also completed the design of [Gang Scheduling](https://github.com/koordinator-sh/koordinator/blob/main/docs/proposals/scheduling/20220701-schedule-gang.md). Development of these capabilities is ongoing and will be released in v0.7. + +In addition, in order to explore the mechanism of GPU overcommitment, we have implemented the ability to [report GPU Metric](https://github.com/koordinator-sh/koordinator/pull/361) in v0.6. + +## What’s coming next in Koordinator + +Don't forget that Koordinator is developed in the open. You can check out our Github milestone to know more about what +is happening and what we have planned. For more details, please refer to +our [milestone](https://github.com/koordinator-sh/koordinator/milestones). Hope it helps! \ No newline at end of file diff --git a/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-09-23-release/index.md b/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-09-23-release/index.md new file mode 100644 index 000000000..81fac9c8e --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-09-23-release/index.md @@ -0,0 +1,373 @@ +--- +slug: release-v0.7.0 +title: "Koordinator v0.7: 为任务调度领域注入新活力" +authors: [joseph] +tags: [release] +--- + +[Koordinator[1]](https://koordinator.sh/) 继上次 [v0.6版本[2]](https://mp.weixin.qq.com/s/YdoxVxz_91ZFemF8JuxRvQ) 发布后,经过 Koordinator 社区的努力,我们迎来了具有重大意义的 v0.7 版本。在这个版本中着重解决机器学习、大数据场景需要的任务调度能力,例如 CoScheduling、ElasticQuota和精细化的 GPU 共享调度能力。并在调度问题诊断分析方面得到了增强,重调度器也极大的提升了安全性,降低了重调度的风险。 + +# 版本功能特性解读 + +## 1. 任务调度 + +### 1.1 Enhanced Coscheduling + +Gang scheduling是在并发系统中将多个相关联的进程调度到不同处理器上同时运行的策略,其最主要的原则是保证所有相关联的进程能够同时启动,防止部分进程的异常,导致整个关联进程组的阻塞。例如当提交一个Job时会产生多个任务,这些任务期望要么全部调度成功,要么全部失败。这种需求称为 All-or-Nothing,对应的实现被称作 Gang Scheduling(or Coscheduling) 。
Koordinator 在启动之初,期望支持 Kubernetes 多种工作负载的混部调度,提高工作负载的运行时效率和可靠性,其中就包括了机器学习和大数据领域中广泛存在的具备 All-or-Nothing 需求的作业负载。 为了解决 All-or-Nothing 调度需求,Koordinator v0.7.0 基于社区已有的 Coscheduling 实现了 Enhanced Coscheduling。
Enhanced Coscheduling 秉承着 Koordiantor 兼容社区的原则,完全兼容社区 Coscheduling 和 依赖的 PodGroup CRD。已经使用 PodGroup 的用户可以无缝升级到 Koordinator。
除此之外,Enhanced Coscheduling 还实现了如下增强能力: + +#### 支持 `Strict` 和 `NonStrict` 两种模式 + +两种模式的区别在于 `Strict`模式(即默认模式)下调度失败会 Reject 所有分配到资源并处于 Wait 状态的 Pod,而 `NonStrict` 模式不会发起 Reject。NonStrict 模式下,同属于一个 PodGroup 的 Pod A 和 PodB 调度时,如果 PodA 调度失败不会影响 PodB 调度, PodB 还会继续被调度。NonStrict 模式对于体量较大的 Job 比较友好,可以让这种大体量 Job 更快的调度完成,但同时也增加了资源死锁的风险。后续 Koordinator 会提供 NonStrict 模式下解决死锁的方案实现。
用户在使用时,可以在 PodGroup 或者 Pod 中追加 annotation `gang.scheduling.koordinator.sh/mode=NonStrict`开启 NonStrict 模式。 + +#### 改进 PodGroup 调度失败的处理机制,实现更高效的重试调度 + +举个例子,PodGroup A 关联了5个Pod,其中前3个Pod通过Filter/Score,进入Wait阶段,第4个Pod调度失败,当调度第5个Pod时,发现第4个Pod已经失败,则拒绝调度。在社区 Coscheduling 实现中,调度失败的PodGroup 会加入到基于cache机制的 lastDeniedPG 对象中,当 cache 没有过期,则会拒绝调度;如果过期就允许继续调度。可以看到 cache 的过期时间很关键,过期时间设置的过长会导致Pod迟迟得不到调度机会,设置的过短会出现频繁的无效调度。
而在Enhanced Coscheduling 中,实现了一种基于 ScheduleCycle 的重试机制。以上场景为例,5个Pod的 ScheduleCycle 初始值为 0,PodGroup 对应的 ScheduleCycle 初始值为1;当每一次尝试调度 Pod 时,都会更新 Pod ScheduleCycle 为 PodGroup ScheduleCycle。如果其中一个 Pod 调度失败,会标记当前的 PodGroup ScheduleCycle 无效,之后所有小于 PodGroup ScheduleCycle 的 Pod 都会被拒绝调度。当同一个 PodGroup 下的所有 Pod 都尝试调度一轮后,Pod ScheduleCycle 都更新为当前 PodGroup ScheduleCycle,并递进 PodGroup ScheduleCycle,并标记允许调度。这种方式可以有效规避基于过期时间的缺陷,完全取决于调度队列的配置重试调度。
![image.png](/img/gang-schedulue-cycle.png "基于 ScheduleCycle 的重试机制") + +#### 支持多个 PodGroup 为一组完成 Gang Scheduling + +一些复杂的 Job 有多种角色,每个角色管理一批任务,每个角色的任务要求支持 All-or-Nothing ,每个角色的 MinMember 要求也不一样,并且每个角色之间也要求 All-or-Nothing。这就导致每个角色都有一个对应的 PodGroup ,并且还要求 PodGroup 即使满足了也需要等待其他角色的 PodGroup 必须满足。社区 Coscheduling 无法满足这种场景需求。而 Koordinator 实现的 Enhanced Coscheduling 支持用户在多个 PodGroup 中增加 anntation 相关关联实现,并支持跨Namespace。例如用户有2个PodGroup ,名字分别是PodGroupA和PodGroupB,可以按照如下例子关联两个 PodGroup: +```yaml +apiVersion: v1alpha1 +kind: PodGroup +metadata: + name: podGroupA + namespace: default + annotations: + gang.scheduling.koordinator.sh/groups: ["namespaceA/podGroupA", "namespaceB/podGroupB"] +spec: + ... +``` + +#### 支持轻量化 Gang 协议 + +如果用户不希望创建 PodGroup,认为创建 PodGroup 太繁琐,那么可以考虑在一组 Pod 中填充相同 annotation `gang.scheduling.koordinator.sh/name=` 表示这一组 Pod 使用 Coscheduling 调度。如果期望设置 minMember ,可以追加 Annotation `gang.scheduling.koordinator.sh/min-available=`。举个例子: +```yaml +apiVersion: v1 +kind: Pod +metadata: + annotations: + gang.scheduling.koordinator.sh/name: "pod-group-a" + gang.scheduling.koordinator.sh/min-available: "5" + name: demo-pod + namespace: default +spec: + ... +``` + +### 1.2 ElasticQuota Scheduling + +一家中大型公司内有多个产品和研发团队,共用多个比较大规模的 Kubernetes 集群,这些集群内含有的大量 CPU/Memory/Disk 等资源被资源运营团队统一管理。运营团队往往在采购资源前,通过额度预算的机制让公司内每个团队根据自身的需求提交额度预算。业务团队此时一般根据业务当前和对未来的预期做好额度预算。最理想的情况是每一份额度都能够被使用,但现实告诉我们这是不现实的。往往出现的问题是: + +1. 团队 A 高估了业务的发展速度,申请了太多的额度用不完 +2. 团队 B 低估了业务的发展速度,申请的额度不够用 +3. 团队 C 安排了一场活动,手上的额度不够多了,但是活动只持续几周,申请太多额度和资源也会浪费掉。 +4. 团队 D 下面还有各个子团队和业务,每个子团队内也会出现类似A B C 三个团队的情况,而且其中有些团队的业务临时突发需要提交一些计算任务要交个客户,但是没有额度了,走额度预算审批也不够了。 +5. ...... + +以上大家日常经常遇到的场景,在混部场景、大数据场景,临时性突发需求又是时常出现的,这些资源的需求都给额度管理工作带来了极大的挑战。做好额度管理工作,一方面避免过度采购资源降低成本,又要在临时需要额度时不采购资源或者尽量少的采购资源;另一方面不能因为额度问题限制资源使用率,额度管理不好就会导致即使有比较好的技术帮助复用资源,也无法发挥其价值。 总之,额度管理工作是广大公司或组织需长期面对且必须面对的问题。
Kubernetes ResourceQuota 可以解决额度管理的部分问题。 原生 Kubernetes ResourceQuota API 用于指定每个 Namespace 的最大资源额度量,并通过 admission 机制完成准入检查。如果 Namespace 当前资源分配总量超过ResourceQuota 指定的配额,则拒绝创建 Pod。 Kubernetes ResourceQuota 设计有一个局限性:Quota 用量是按照 Pod Requests 聚合的。 虽然这种机制可以保证实际的资源消耗永远不会超过 ResourceQuota 的限制,但它可能会导致资源利用率低,因为一些 Pod 可能已经申请了资源但未能调度。
Kuberenetes Scheduler-Sig 后来给出了一个借鉴 Yarn Capacity Scheduling,称作 ElasticQuota 的设计方案并给出了具体的实现。允许用户设置 max 和 min: + +- max 表示用户可以消费的资源上限 +- min 表示需要保障用户实现基本功能/性能所需要的最小资源量 + +通过这两个参数可以帮助用户实现如下的需求: + +1. 用户设置 min < max 时,当有突发资源需求时,即使当前 ElasticQuota 的总用量超过了 min, 但只要没有达到 max,那么用户可以继续创建新的 Pod 应对新的任务请求。 +2. 当用户需要更多资源时,用户可以从其他 ElasticQuota 中“借用(borrow)” 还没有被使用并且需要通保障的 min。 +3. 当一个 ElasticQuota 需要使用 min 资源时,会通过抢占机制从其他借用方抢回来,即驱逐一些其他ElasticQuota 超过 min 用量的 Pod。 + +ElasticQuota 还有一些局限性:没有很好的保障公平性。假如同一个 ElasticQuota 有大量新建的Pod,有可能会消耗所有其他可以被借用的Quota,从而导致后来的 Pod 可能拿不到 Quota。此时只能通过抢占机制抢回来一些 Quota。
另外 ElasticQuota 和 Kubernetes ResourceQuota 都是面向 Namespace的,不支持多级树形结构,对于一些本身具备复杂组织关系的企业/组织不能很好的使用ElasticQuota/Kubenretes ResourceQuota 完成额度管理工作。
Koordinator 针对这些额度管理问题,给出了一种基于社区 ElasticQuota 实现的支持多级管理方式的弹性Quota管理机制(multi hierarchy quota management)。具备如下特性: + +- 兼容社区的 ElasticQuota API。用户可以无缝升级到 Koordinator +- 支持树形结构管理 Quota。 +- 支持按照共享权重(shared weight)保障公平性。 +- 允许用户设置是否允许借用Quota 给其他消费对象。 + +#### Pod 关联 ElasticQuota 方式 + +用户可以非常使用的使用该能力,可以完全按照 ElasticQuota 的用法,即每个 Namespace 设置一个 ElasticQuota 对象。也可以在 Pod 中追加 Label 关联 ElasticQuota: +```yaml +apiVersion: v1 +kind: Pod +metadata: + labels: + quota.scheduling.koordinator.sh/name: "elastic-quota-a" + name: demo-pod + namespace: default +spec: + ... +``` +#### 树形结构管理机制和使用方法 + +需要使用树形结构管理 Quota 时,需要在 ElasticQuota 中追加 Label `quota.scheduling.koordinator.sh/is-parent`表示当前 ElasticQuota 是否是父节点,`quota.scheduling.koordinator.sh/parent`表示当前 ElasticQuota 的父节点 ElasticQuota 的名字。举个例子:
![image.png](/img/quota-tree.png)
我们创建一个 ElasticQuota Root 作为根节点,资源总量为CPU 100C,内存200Gi,以及子节点 quota-a +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: parentA + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: "true" + quota.scheduling.koordinator.sh/allow-lent-resource: "true" +spec: + max: + cpu: 100 + memory: 200Gi + min: + cpu: 100 + memory: 200Gi +--- +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: childA1 + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: "false" + quota.scheduling.koordinator.sh/parent: "parentA" + quota.scheduling.koordinator.sh/allow-lent-resource: "true" +spec: + max: + cpu: 40 + memory: 100Gi + min: + cpu: 20 + memory: 40Gi +``` + +在使用树形结构管理 ElasticQuota 时,有一些需要遵循的约束: + +1. 除了根节点,其他所有子节点的 min 之和要小于父节点的 min。 +2. 不限制子节点 max,允许子节点的 max 大于父节点的 max。考虑以下场景,集群中有 2 个 ElasticQuota 子树:dev-parent 和 production-parent,每个子树都有几个子 ElasticQuota。 当 production-parent 忙时,我们可以通过只降低 dev-parent 的 max 限制 dev-parent 整颗子树的资源使用量,而不是降低 dev-parent 子树的每个子 ElasticQuota 的max限制用量。 +3. Pod 不能使用父节点ElasticQuota。如果放开这个限制,会导致整个弹性 Quota 的机制变的异常复杂,暂时不考虑支持这种场景。 +4. 只有父节点可以挂子节点,不允许子节点挂子节点 +5. 暂时不允许改变 ElasticQuota 的 `quota.scheduling.koordinator.sh/is-parent`属性 + +我们将在下个版本中通过 webhook 机制实现这些约束。 + +#### 公平性保障机制 + +为了方便阅读和理解将要介绍的公平性保障机制,先明确几个新概念: + +- request 表示同一个 ElasticQuota 关联的所有 Pod 的资源请求量。如果一个 ElasticQuota A 的 request 小于 min,ElasticQuota B 的 request 大于 min,那么 ElasticQuota A 未使用的部分,即 min - request 剩余的量通过公平性保障机制借用给 ElasticQuota B. 当 ElasticQuota A 需要使用这些借走的量时,要求 ElasticQuota B 依据公平性保障机制归还给 ElasticQuota A。 +- runtime 表示 ElasticQuota 当前可以使用的实际资源量。如果 request 小于 min,runtime 等于 request。这也意味着,需要遵循 min 语义,应无条件满足 request。如果 request 大于 min,且 min 小于 max,公平性保障机制会分配 runtime 在min 与 max 之前,即 max >= runtime >= min。 +- shared-weight 表示一个 ElasticQuota 的竞争力,默认等于 ElasticQuota Max。 + +通过几个例子为大家介绍公平性保障机制的运行过程,假设当前集群的 CPU 总量为100C,并且有4个ElasticQuota,如下图所示,绿色部分为 Request 量:A 当前的request 为5,B当前的request为20,C当前的Request为30,D当前的Request为70。
![image.png](/img/quota-init-example.png)
并且我们注意到, A, B, C, D 的 min 之和是60,剩下 40 个空闲额度, 同时 A 还可以借给 B, C, D 5个额度,所以一共有45个额度被B,C,D共享,根据各个ElasticQuota的 shared-weight,B,C,D分别对应60,50和80,计算出各自可以共享的量: + +- B 可以获取 14个额度, 45 * 60 / (60 + 50 + 80) = 14 +- C 可以获取 12个额度, 45 * 50 / (60 + 50 + 80) = 12 +- D 可以获取 19个额度, 45 * 80 / (60 + 50 + 80) = 19 + +![image.png](/img/quota-init-runtime-example.png)
但我们也要注意的是,C和D需要更多额度,而 B只需要5个额度就能满足 Request,并且 B 的min是15,也就意味着我们只需要给 B 5个额度,剩余的9个额度继续分给C和D。 + +- C 可以获取 3个额度, 9 * 50 / (50 + 80) = 3 +- D 可以获取 6个额度, 9 * 80 / (50 + 80) = 6 + +[![](https://github.com/koordinator-sh/koordinator/raw/main/docs/images/runtimequota3.jpg#crop=0&crop=0&crop=1&crop=1&from=url&id=XJyFI&margin=%5Bobject%20Object%5D&originHeight=782&originWidth=1570&originalType=binary&ratio=1&rotation=0&showTitle=false&status=done&style=none&title=)](https://github.com/koordinator-sh/koordinator/blob/main/docs/images/runtimequota3.jpg)
最终我们得出如下的分配结果结果: + +- A runtime = 5 +- B runtime = 20 +- C runtime = 35 +- D runtime = 40 + +[![](https://github.com/koordinator-sh/koordinator/raw/main/docs/images/runtimequota4.jpg#crop=0&crop=0&crop=1&crop=1&from=url&id=J8tN9&margin=%5Bobject%20Object%5D&originHeight=778&originWidth=1560&originalType=binary&ratio=1&rotation=0&showTitle=false&status=done&style=none&title=)](https://github.com/koordinator-sh/koordinator/blob/main/docs/images/runtimequota4.jpg)
总结整个过程可以知道: + +1. 当前 request < min 时,需要借出 lent-to-quotas;当 request > min 时,需要借入 borrowed-qutoas +2. 统计所有 runtime < min 的 Quota,这些总量就是接下来可被借出的量。 +3. 根据 shared-weight 计算每个ElasticQuota可以借入的量 +4. 如果最新的 runtime > reuqest,那么 runtime - request 剩余的量可以借给更需要的对象。 + +另外还有一种日常生产时会遇到的情况:即集群内资源总量会随着节点故障、资源运营等原因降低,导致所有ElasticQuota的 min 之和大于资源总量。当出现这种情况时,我们无法确保 min 的资源述求。此时我们会按照一定的比例调整各个ElasticQuota的min,确保所有min之和小于或者等于当前实际的资源总量。 + +#### 抢占机制 + +Koordinator ElasticQuota 机制在调度阶段如果发现 Quota 不足,会进入抢占阶段,按照优先级排序,抢占属于同一个ElasticQuota 内的 低优先级 Pod。 同时,我们不支持跨 ElasticQuota 抢占其他 Pod。但是我们也提供了另外的机制支持从借用 Quota 的 ElasticQuota 抢回。
举个例子,在集群中,有两个 ElasticQuota,ElasticQuota A {min = 50, max = 100}, ElasticQuota B {min = 50, max = 100}。用户在上午10点使用 ElasticQuota A 提交了一个 Job, Request = 100 ,此时因为 ElasticQuota B 无人使用,ElasticQuota A 能从 B 手里借用50个Quota,满足了 Request = 100, 并且此时 Used = 100。在11点钟时,另一个用户开始使用 ElasticQuota B 提交Job,Request = 100,因为 ElasticQuota B 的 min = 50,是必须保障的,通过公平性保障机制,此时 A 和 B 的 runtime 均为50。那么此时对于 ElasticQuota A ,Used = 100 是大于当前 runtime = 50 的,因此我们会提供一个 Controller,驱逐掉一部分 Pod ,使得当前 ElasticQuota A 的 Used 降低到 runtime 相等的水位。 + +## 2. 精细化资源调度 + +### Device Share Scheduling + +机器学习领域里依靠大量强大算力性能的 GPU 设备完成模型训练,但是 GPU 自身价格十分昂贵。如何更好地利用GPU设备,发挥GPU的价值,降低成本,是一个亟待解决的问题。 Kubernetes 社区现有的 GPU 分配机制中,GPU 是由 kubelet 分配的,并只支持分配一个或多个完整的 GPU 实例。 这种方法简单可靠,但类似于 CPU 和 Memory,GPU 并不是一直处于高利用率水位,同样存在资源浪费的问题。 因此,Koordinator 希望支持多工作负载共享使用 GPU 设备以节省成本。 此外,GPU 有其特殊性。 比如下面的 NVIDIA GPU 支持的 NVLink 和超卖场景,都需要通过调度器进行中央决策,以获得全局最优的分配结果。
![image.png](/img/nvlink.png) + +从图中我们可以发现,虽然该节点有8个 GPU 实例,型号为A100/V100,但 GPU 实例之间的数据传输速度是不同的。 当一个 Pod 需要多个 GPU 实例时,我们可以为 Pod 分配具有最大数据传输速度组合关系的 GPU 实例。 此外,当我们希望一组 Pod 中的 GPU 实例具有最大数据传输速度组合关系时,调度器应该将最佳 GPU 实例批量分配给这些 Pod,并将它们分配到同一个节点。 + +#### GPU 资源协议 + +Koordinator 兼容社区已有的 `nvidia.com/gpu`资源协议,并且还自定义了扩展资源协议,支持用户更细粒度的分配 GPU 资源。 + +- kubernetes.io/gpu-core 代表GPU的计算能力。 与 Kuberetes MilliCPU 类似,我们将 GPU 的总算力抽象为100,用户可以根据需要申请相应数量的 GPU 算力。 +- kubernetes.io/gpu-memory 表示 GPU 的内存容量,以字节为单位。 +- kubernetes.io/gpu-memory-ratio 代表 GPU 内存的百分比。 + +假设一个节点有4个GPU设备实例,每个GPU设备实例有 8Gi 显存。用户如果期望申请一个完整的 GPU 实例,除了使用 `nvidia.com/gpu`之外,还可以按照如下方式申请: +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: demo-pod + namespace: default +spec: + containers: + - name: main + resources: + limits: + kubernetes.io/gpu-core: 100 + kubernetes.io/gpu-memory: "8Gi" + requests: + kubernetes.io/gpu-core: 100 + kubernetes.io/gpu-memory: "8Gi" +``` + +如果期望只使用一个 GPU 实例一半的资源,可以按照如下方式申请: +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: demo-pod + namespace: default +spec: + containers: + - name: main + resources: + limits: + kubernetes.io/gpu-core: 50 + kubernetes.io/gpu-memory: "4Gi" + requests: + kubernetes.io/gpu-core: 50 + kubernetes.io/gpu-memory: "4Gi" +``` +#### 设备信息和设备容量上报 + +在 Koordinator v0.7.0 版本中,单机侧 koordlet 安装后会自动识别节点上是否含有 GPU 设备,如果存在的话,会上报这些 GPU 设备的 Minor ID、 UUID、算力和显存大小到一个类型为 Device CRD 中。每个节点对应一个 Device CRD 实例。Device CRD 不仅支持描述 GPU,还支持类似于 FPGA/RDMA等设备类型,目前 v0.7.0 版本只支持 GPU, 暂未支持这些设备类型。
Device CRD 会被 koord-manager 内的 NodeResource controller 和 koord-scheduler 消费。NodeResource controller 会根据 Device CRD 中描述的信息,换算成 Koordinator 支持的资源协议 `kubernetes.io/gpu-core`,`kubernetes.io/gpu-memory` 更新到 Node.Status.Allocatable 和 Node.Status.Capacity 字段,帮助调度器和 kubelet 完成资源调度。gpu-core 表示GPU 设备实例的算力,一个实例的完整算力为100。假设一个节点有 8 个 GPU 设备实例,那么节点的 gpu-core 容量为 8 * 100 = 800; gpu-memory 表示 GPU 设备实例的显存大小,单位为字节,同样的节点可以分配的显存总量为 设备数量 * 每个实例的单位容量,例如一个 GPU 设备的显存是 8G,节点上有8 个 GPU 实例,总量为 8 * 8G = 64G。 +```yaml +apiVersion: v1 +kind: Node +metadata: + name: node-a +status: + capacity: + koordinator.sh/gpu-core: 800 + koordinator.sh/gpu-memory: "64Gi" + koordinator.sh/gpu-memory-ratio: 800 + allocatable: + koordinator.sh/gpu-core: 800 + koordinator.sh/gpu-memory: "64Gi" + koordinator.sh/gpu-memory-ratio: 800 +``` + +#### 中心调度分配设备资源 + +Kuberetes 社区原生提供的设备调度机制中,调度器只负责校验设备容量是否满足 Pod,对于一些简单的设备类型是足够的,但是当需要更细粒度分配 GPU 时,需要中心调度器给予支持才能实现全局最优。
Koordinator 调度器 koord-scheduler 新增了调度插件 DeviceShare,负责精细度设备资源调度。DeviceShare 插件消费 Device CRD,记录每个节点可以分配的设备信息。DeviceShare 在调度时,会把 Pod 的GPU资源请求转换为 Koordinator 的资源协议,并过滤每个节点的未分配的 GPU 设备实例。确保有资源可用后,在 Reserve 阶段更新内部状态,并在 PreBind 阶段更新 Pod Annotation,记录当前 Pod 应该使用哪些 GPU 设备。
DeviceShare 将在后续版本支持 Binpacking 和 Spread 策略,实现更好的设备资源调度能力。 + +#### 单机侧精准绑定设备信息 + +Kubernetes 社区在 kubelet 中提供了 DevicePlugin 机制,支持设备厂商在 kubelet 分配好设备后有机会获得设备信息,并填充到环境变量或者更新挂载路径。但是不能支持 中心化的 GPU 精细化调度场景。
针对这个问题, Koordinator 扩展了 koord-runtime-proxy ,支持在 kubelet 创建容器时更新环境变量,注入调度器分配的 GPU 设备信息。
![](/img/koordlet-inject-env.jpeg) + +## 3. 调度器诊断分析 + +大家在使用 Kubernetes 时经常会遇到一些调度相关的问题: + +1. 我这个 Pod 为什么不能调度? +2. 这个 Pod 为什么会调度到这个节点,不是应该被另一个打分插件影响到么? +3. 我新开发了一个插件,发现调度结果不符合预期,但是有不知道哪里出了问题。 + +要诊断分析这些问题,除了要掌握 Kubernetes 基本的调度机制和资源分配机制外,还需要调度器自身给予支持。但是 Kubernetes kube-scheduler 提供的诊断能力比较有限,有时候甚至没有什么日志可以查看。kube-scheduler 原生是支持通过 HTTP 更改日志等级,可以获得更多日志信息,例如执行如下命令可以更改日志等级到5: +```bash +$ curl -X PUT schedulerLeaderIP:10251/debug/flags/v --data '5' +successfully set klog.logging.verbosity to 5 +``` + +Koordinator 针对这些问题,实现了一套 Restful API ,帮助用户提升问题诊断分析的效率 + +### 分析 Score 结果 + +`PUT /debug/flags/s` 允许用户打开 Debug Score 开关,在打分结束后,以Markdown 格式打印 TopN 节点各个插件的分值。例如: +```bash +$ curl -X PUT schedulerLeaderIP:10251/debug/flags/s --data '100' +successfully set debugTopNScores to 100 +``` + 当有新 Pod 调度时,观察 scheduler log 可以看到如下信息 +```bash +| # | Pod | Node | Score | ImageLocality | InterPodAffinity | LoadAwareScheduling | NodeAffinity | NodeNUMAResource | NodeResourcesBalancedAllocation | NodeResourcesFit | PodTopologySpread | Reservation | TaintToleration | +| --- | --- | --- | ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| +| 0 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.51 | 577 | 0 | 0 | 87 | 0 | 0 | 96 | 94 | 200 | 0 | 100 | +| 1 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.50 | 574 | 0 | 0 | 85 | 0 | 0 | 96 | 93 | 200 | 0 | 100 | +| 2 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.19 | 541 | 0 | 0 | 55 | 0 | 0 | 95 | 91 | 200 | 0 | 100 | +| 3 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.18 | 487 | 0 | 0 | 15 | 0 | 0 | 90 | 82 | 200 | 0 | 100 | + +``` + +找个 Markdown 工具,就可以转为如下表格 + +| # | Pod | Node | Score | LoadAwareScheduling | NodeNUMAResource | NodeResourcesFit | PodTopologySpread | +| --- | --- | --- | --- | --- | --- | --- | --- | +| 0 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.51 | 577 | 87 | 0 | 94 | 200 | +| 1 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.50 | 574 | 85 | 0 | 93 | 200 | +| 2 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.19 | 541 | 55 | 0 | 91 | 200 | +| 3 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.18 | 487 | 15 | 0 | 82 | 200 | + +### 调度插件导出内部状态 + +像 koord-scheduler 内部的 NodeNUMAResource 、 DeviceShare和ElasticQuota等插件内部都有维护一些状态帮助调度。 koord-scheduler 自定义了一个新的插件扩展接口(定义见下文),并会在初始化插件后,识别该插件是否实现了该接口并调用该接口,让插件注入需要暴露的 RestfulAPI。以 NodeNUMAResource 插件为例,会提供 `/cpuTopologyOptions/:nodeName`和 `/availableCPUs/:nodeName`两个Endpoints,可以查看插件内部记录的 CPU 拓扑信息和分配结果。 + +```go +type APIServiceProvider interface { + RegisterEndpoints(group *gin.RouterGroup) +} +``` + +用户在使用时,按照 `/apis/v1/plugins//`方 式构建 URL 查看数据,例如要查看 `/cpuTopologyOptions/:nodeName`: + +```bash +$ curl schedulerLeaderIP:10252/apis/v1/plugins/NodeNUMAResources/cpuTopologyOptions/node-1 +{"cpuTopology":{"numCPUs":32,"numCores":16,"numNodes":1,"numSockets":1,"cpuDetails":.... +``` + +### 查看当前支持的插件 API + +为了方便大家使用,koord-scheduler 提供了 `/apis/v1/__services__` 查看支持的 API Endpoints +```bash +$ curl schedulerLeaderIP:10251/apis/v1/__services__ +{ + "GET": [ + "/apis/v1/__services__", + "/apis/v1/nodes/:nodeName", + "/apis/v1/plugins/Coscheduling/gang/:namespace/:name", + "/apis/v1/plugins/DeviceShare/nodeDeviceSummaries", + "/apis/v1/plugins/DeviceShare/nodeDeviceSummaries/:name", + "/apis/v1/plugins/ElasticQuota/quota/:name", + "/apis/v1/plugins/NodeNUMAResource/availableCPUs/:nodeName", + "/apis/v1/plugins/NodeNUMAResource/cpuTopologyOptions/:nodeName" + ] +} +``` + +## 4. 更安全的重调度 + +在 Koordinator v0.6 版本中我们发布了全新的 koord-descheduler,支持插件化实现需要的重调度策略和自定义驱逐机制,并内置了面向 PodMigrationJob 的迁移控制器,通过 Koordinator Reservation 机制预留资源,确保有资源的情况下发起驱逐。解决了 Pod 被驱逐后无资源可用影响应用的可用性问题。
Koordinator v0.7 版本中,koord-descheduler 实现了更安全的重调度 + +- 支持 Evict 限流,用户可以根据需要配置限流策略,例如允许每分钟驱逐多少个 Pod +- 支持配置 Namespace 灰度重调度能力,让用户可以更放心的灰度 +- 支持按照 Node/Namespace 配置驱逐数量,例如配置节点维度最多只驱逐两个,那么即使有插件要求驱逐该节点上的更多Pod,会被拒绝。 +- 感知 Workload ,如果一个 Workload 正在发布、缩容、已经有一定量的 Pod 正在被驱逐或者一些Pod NotReady,重调度器会拒绝新的重调度请求。目前支持原生的 Deployment,StatefulSet 以及 Kruise CloneSet,Kruise AdvancedStatefulSet。 + +后续重调度器还会提升公平性,防止一直重复的重调度同一个 workload ,尽量降低重调度对应用的可用性的影响。 + +## 5. 其他改动 + +- Koordinator 进一步增强了 CPU 精细化调度能力,完全兼容 kubelet ( <= v1.22) CPU Manager static 策略。调度器分配 CPU 时会避免分配被 kubelet 预留的 CPU,单机侧koordlet完整适配了kubelet从1.18到1.22版本的分配策略,有效避免了 CPU 冲突。 +- 资源预留机制支持 AllocateOnce 语义,满足单次预留场景。并改进了 Reservation 状态语义,更加准确描述 Reservation 对象当前的状态。 +- 改进了离线资源(Batch CPU/Memory) 的声明方式,支持limit大于request的资源描述形式,可以方便原burstable类型的任务直接转换为混部模式运行。 + +你可以通过 [Github release[6]](https://github.com/koordinator-sh/koordinator/releases/tag/v0.6.1) 页面,来查看更多的改动以及它们的作者与提交记录。 + +# 相关链接 + +- [[1] Koordinator](https://koordinator.sh) +- [[2] Koordinator 0.6 Release Note](https://mp.weixin.qq.com/s/YdoxVxz_91ZFemF8JuxRvQ) +- [[3] Design: Gang Scheduling](https://github.com/koordinator-sh/koordinator/blob/main/docs/proposals/scheduling/20220901-gang-scheduling.md) +- [[4] Design: Multi Hierarchy ElasticQuota Management](https://github.com/koordinator-sh/koordinator/blob/main/docs/proposals/scheduling/20220722-multi-hierarchy-elastic-quota-management.md) +- [[5] Design: Fine-grained Device Scheduling](https://github.com/koordinator-sh/koordinator/blob/main/docs/proposals/scheduling/20220629-fine-grained-device-scheduling.md) +- [[6] Github Release](https://github.com/koordinator-sh/koordinator/releases/tag/v0.6.1) +- [[7] Slack Channel](https://join.slack.com/t/koordinator-sh/shared_invite/zt-1756qoub4-Cn4~esfdlfAPsD7cwO2NzA) +- [[8] 云原生混部系统 Koordinator 架构详解](https://mp.weixin.qq.com/s/y8k_q6rhTIubQ-lqvDp2hw) diff --git a/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-11-03-release/index.md b/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-11-03-release/index.md new file mode 100644 index 000000000..ef4d66933 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-blog/2022-11-03-release/index.md @@ -0,0 +1,78 @@ +--- +slug: release-v1.0.0 +title: "Koordinator v1.0: 正式发布" +authors: [joseph] +tags: [release] +--- + +Koordinator 今年3月份开源以来,先后发布了7个版本,逐步的把阿里巴巴&阿里云内部的混部系统的核心能力输出到开源社区,并在中间过程中逐渐的被 Kubernetes、大数据、高性能计算、机器学习领域或者社区的关注,Koordinator 社区也逐步获得了一些贡献者的支持,并有一些企业开始逐步的在生产环境中使用 Koordinator 解决实际生产中遇到的成本问题、混部问题等。 经过 Koordinator 社区的努力,我们怀着十分激动的心情向大家宣布 Koordinator 1.0 版本正式发布。 + +Koordinator 项目早期着重建设核心混部能力 -- 差异化 SLO,并且为了让用户更容易的使用 Koordinator 的混部能力,Koordinator 提供了 ClusterColocationProfile 机制帮助用户可以不用修改存量代码完成不同工作负载的混部,让用户逐步的熟悉混部技术。随后 Koordinaor 逐步在节点侧 QoS 保障机制上做了增强,提供了包括但不限于 CPU Suppress、CPU Burst、 Memory QoS、L3 Cache/MBA 资源隔离机制和基于满足度驱逐机制等多种能力,解决了大部分节点侧工作负载的稳定性问题。配合使用 Koordinator Runtime Proxy 组件,可以更好的兼容 Kubernetes kubelet 原生管理机制。 + +并且 Koordinator 在任务调度和 QoS 感知调度以及重调度等方面也都提供了一些创新方案,建设了全面兼容 Kubernetes CPU 管理机制的精细化 CPU 调度能力,面向节点实际负载的均衡调度能力。为了更好的让用户管理好资源, Koordinator 还提供了资源预留能力(Reservation),并且 Koordinator 基于 Kubernetes 社区已有的Coscheduling、ElasticQuota Scheduling 能力做了进一步的增强,为任务调度领域注入了新的活力。Koordinator 提供了全新的重调度器框架,着重建设 Descheduler 的扩展性和安全性问题。 + +# 安装或升级 Koordinator v1.0.0 + +## 使用 Helm 安装 + +您可以通过 helm v3.5+ 非常方便的安装 Koordinator,Helm 是一个简单的命令行工具,您可以从 [这里](https://github.com/helm/helm/releases) 获取它。 + +```shell +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Install the latest version. +$ helm install koordinator koordinator-sh/koordinator --version 1.0.0 +``` + +# 版本功能特性解读 + +Koordinator v1.0 整体新增的特性并不多,主要有以下一些变化 + +## 独立 API Repo + +为了更方便集成和使用 Koordiantor 定义的 API,并避免因依赖 Koordiantor 引入额外的依赖或者依赖冲突问题,我们建立了独立的 API Repo: [koordinator-sh/apis](https://github.com/koordinator-sh/apis) + +## 新增 ElasticQuota Webhook + +在 Koordinator v0.7 版本中,我们基于 Kubernetes sig-scheduler 提供的 ElasticQuota 做了诸多增强,提供了树形管理机制,并提供了公平性保障机制等,可以很好的帮助您解决使用 ElasticQuota 遇到的问题。在 Koordinator v1.0 版本中,我们进一步提供了 ElasticQuota Webhook,帮助您在使用 ElasticQuota 树形管理机制时,保障新的 ElasticQuota 对象遵循 Koordinator 定义的规范或约束: + +1. 除了根节点,其他所有子节点的 min 之和要小于父节点的 min。 +2. 不限制子节点 max,允许子节点的 max 大于父节点的 max。考虑以下场景,集群中有 2 个 ElasticQuota 子树:dev-parent 和 production-parent,每个子树都有几个子 ElasticQuota。 当 production-parent 忙时,我们可以通过只降低 dev-parent 的 max 限制 dev-parent 整颗子树的资源使用量,而不是降低 dev-parent 子树的每个子 ElasticQuota 的max限制用量。 +3. Pod 不能使用父节点ElasticQuota。如果放开这个限制,会导致整个弹性 Quota 的机制变的异常复杂,暂时不考虑支持这种场景。 +4. 只有父节点可以挂子节点,不允许子节点挂子节点 +5. 暂时不允许改变 ElasticQuota 的 `quota.scheduling.koordinator.sh/is-parent`属性 + +## 进一步完善 ElasticQuota Scheduling + +在 Koordinator v0.7 版本中,koord-scheduler 的主副 Pod 都会启动 ElasticQuota Controller 并都会更新 ElasticQuota 对象。在 Koordinator v1.0 中我们修复了该问题,确保只有主 Pod 可以启动 Controller 并更新 ElasticQuota 对象。 还优化了 ElasticQuota Controller 潜在的频繁更新 ElasticQuota 对象的问题,当检查到 ElasticQuota 各维度数据发生变化时才会更新,降低频繁更新给 APIServer 带来的压力。 + +## 进一步完善 Device Share Scheduling + +Koordinator v1.0 中 koordlet 会上报 GPU 的型号和驱动版本到 Device CRD 对象中,并会由 koord-manager 同步更新到 Node 对象,追加相应的标签。 + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + kubernetes.io/gpu-driver: 460.91.03 + kubernetes.io/gpu-model: Tesla-T4 + ... + name: cn-hangzhou.10.0.4.164 +spec: + ... +status: + ... +``` + +## Koordinator Runtime Proxy 增强兼容性 + +在 Koordinator 之前的版本中,koord-runtime-proxy 和 koordlet 一起安装后,如果 koordlet 异常或者 koordlet 卸载/重装等场景下,会遇到新调度到节点的 Pod 无法创建容器的问题。为了解决这个问题,koord-runtime-proxy 会感知 Pod 是否具有特殊的 label `runtimeproxy.koordinator.sh/skip-hookserver=true`,如果 Pod 存在该标签,koord-runtime-proxy 会直接把 CRI 请求转发给 containerd/docker 等 runtime。 + +## 其他改动 + +你可以通过 [Github release](https://github.com/koordinator-sh/koordinator/releases/tag/v1.0.0) 页面,来查看更多的改动以及它们的作者与提交记录。 diff --git a/i18n/zh-Hans/docusaurus-plugin-content-blog/2023-01-03-release/index.md b/i18n/zh-Hans/docusaurus-plugin-content-blog/2023-01-03-release/index.md new file mode 100644 index 000000000..67a85aa1a --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-blog/2023-01-03-release/index.md @@ -0,0 +1,158 @@ +--- +slug: release-v1.1.0 +title: "Koordinator v1.1: 让调度感知负载与干扰检测采集" +authors: [FillZpp] +tags: [release] +--- + +## 背景 + +Koordinator 旨在为用户提供完整的混部工作负载编排、混部资源调度、混部资源隔离及性能调优解决方案,帮助用户提高延迟敏感服务的运行性能,挖掘空闲节点资源并分配给真正有需要的计算任务,从而提高全局的资源利用效率。 + +从 2022 年 4 月发布以来,Koordinator 迄今一共迭代发布了 9 个版本。项目经历的大半年发展过程中,社区吸纳了包括阿里巴巴、小米、小红书、爱奇艺、360、有赞 等在内的大量优秀工程师,贡献了众多的想法、代码和场景,一起推动 Koordinator 项目的成熟。 + +今天,很高兴的宣布 Koordinator v1.1 正式发布,它包含了负载感知调度/重调度、cgroup v2 支持、干扰检测指标采集,以及其他一系列优化点。接下来我们就针对这些新增特性做深入解读与说明。 + +## 版本特性深入解读 + +### 负载感知调度 + +#### 支持按工作负载类型统计和均衡负载水位 + +Koordinator v1.0 及之前的版本,提供了负载感知调度提供基本的利用率阈值过滤保护高负载水位的节点继续恶化影响工作负载的运行时质量,以及通过预估机制解决解决冷节点过载的情况。已有的负载感知调度能解决很多常见场景的问题。但负载感知调度作为一种优化手段,还有比较多的场景是需要完善的。 + +目前的负载感知调度主要解决了集群内整机维度的负载均衡效果,但有可能出现一些特殊的情况:节点部署了不少离线Pod运行,拉高了整机的利用率,但在线应用工作负载的整体利用率偏低。这个时候如果有新的在线Pod,且整个集群内的资源比较紧张时,会有如下的问题: + +1. 有可能因为整机利用率超过整机安全阈值导致无法调度到这个节点上的; +2. 还可能出现一个节点的利用率虽然相对比较低,但上面跑的全是在线应用率,从在线应用角度看,利用率已经偏高了,但按照当前的调度策略,还会继续调度这个Pod上来,导致该节点堆积了大量的在线应用,整体的运行效果并不好。 + +在 Koordinator v1.1 中,koord-scheduler 支持感知工作负载类型,区分不同的水位和策略进行调度。 + +在 Filter 阶段,新增 threshold 配置 `prodUsageThresholds`,表示在线应用的安全阈值,默认为空。如果当前调度的 Pod 是 Prod 类型,koord-scheduler 会从当前节点的 NodeMetric 中统计所有在线应用的利用率之和,如果超过了 `prodUsageThresholds` 就过滤掉该节点;如果是离线 Pod,或者没有配置 `prodUsageThresholds`,保持原有的逻辑,按整机利用率处理。 + +在 Score 阶段,新增开关 `scoreAccordingProdUsage` 表示是否按 Prod 类型的利用率打分均衡。默认不启用。当开启后,且当前 Pod 是 Prod 类型的话,koord-scheduler 在预估算法中只处理 Prod 类型的 Pod,并对 NodeMetrics 中记录的其他的未使用预估机制处理的在线应用的 Pod 的当前利用率值进行求和,求和后的值参与最终的打分。如果没有开启 `scoreAccordingProdUsage`,或者是离线Pod,保持原有逻辑,按整机利用率处理。 + +#### 支持按百分位数利用率均衡 + +Koordinator v1.0及以前的版本都是按照 koordlet 上报的平均利用率数据进行过滤和打分。但平均值隐藏了比较多的信息,因此在 Koordinator v1.1 中 koordlet 新增了根据百分位数统计的利用率聚合数据。调度器侧也跟着做了相应的适配。 + +更改调度器的 LoadAware 插件的配置,`aggregated` 表示按照百分位数聚合数据进行打分和过滤。`aggregated.usageThresholds` 表示过滤时的水位阈值;`aggregated.usageAggregationType` 表示过滤阶段要使用的百分位数类型,支持 `avg`,`p99`, `p95`, `p90` 和 `p50`;`aggregated.usageAggregatedDuration` 表示过滤阶段期望使用的聚合周期,如果不配置,调度器将使用 NodeMetrics 中上报的最大周期的数据;`aggregated.scoreAggregationType` 表示在打分阶段期望使用的百分位数类型;`aggregated.scoreAggregatedDuration` 表示打分阶段期望使用的聚合周期,如果不配置,调度器将使用 NodeMetrics 中上报的最大周期的数据。 + +在 Filter 阶段,如果配置了 `aggregated.usageThresholds` 以及对应的聚合类型,调度器将按该百分位数统计值进行过滤; + +在 Score 阶段,如果配置了 `aggregated.scoreAggregationType`,调度器将会按该百分位数统计值打分;目前暂时不支持 Prod Pod 使用百分位数过滤。 + +### 负载感知重调度 + +Koordinator 在过去的几个版本中,持续的演进重调度器,先后了开源完整的框架,加强了安全性,避免因过度驱逐 Pod 影响在线应用的稳定性。这也影响了重调度功能的进展,过去 Koordinator 暂时没有太多力量建设重调度能力。这一情况将会得到改变。 + +Koordinator v1.1 中我们新增了负载感知重调度功能。新的插件称为 `LowNodeLoad`,该插件配合着调度器的负载感知调度能力,可以形成一个闭环,调度器的负载感知调度在调度时刻决策选择最优节点,但随着时间和集群环境以及工作负载面对的流量/请求的变化时,负载感知重调度可以介入进来,帮助优化负载水位超过安全阈值的节点。 `LowNodeLoad` 与 K8s descheduler 的插件 LowNodeUtilization 不同的是,LowNodeLoad是根据节点真实利用率的情况决策重调度,而 LowNodeUtilization 是根据资源分配率决策重调度。 + +`LowNodeLoad` 插件有两个最重要的参数,分别是 `highThresholds` 和 `lowThresholds`: + +- `highThresholds` 表示负载水位的警戒阈值,超过该阈值的节点上的Pod将参与重调度; +- `lowThresholds` 表示负载水位的安全水位。低于该阈值的节点上的Pod不会被重调度。 + +以下图为例,lowThresholds 为45%,highThresholds 为 70%,那么低于 45% 的节点是安全的,因为水位已经很低了;高于45%,但是低于 70%的是区间是我们期望的负载水位范围;高于70%的节点就不安全了,应该把超过70%的这部分(假设当前节点A的负载水位是85%),那么 85% - 70% = 15% 的负载降低,筛选 Pod 后执行迁移。 + +![LowNodeLoad 示例](/img/lownodeload-sample.png) + +迁移时,还要考虑到低于 45% 的这部分节点是我们重调度后要承载新Pod的节点,我们需要确保迁移的Pod的负载总量不会超过这些低负载节点的承载上限。这个承载上限即是 highThresholds - 节点当前负载,假设节点B的负载水位是20%,那么 70%-20% = 50%,这50%就是可以承载的容量了。因此迁移时每驱逐一个 Pod,这个承载容量就应该扣掉当前重调度 Pod 的当前负载或者预估负载或者画像值(这部分值与负载调度里的值对应)。这样就可以确保不会多迁移。 + +如果一个集群总是可能会出现某些节点的负载就是比较高,而且数量并不多,这个时候如果频繁的重调度这些节点,也会带来安全隐患,因此可以让用户按需设置 `numberOfNodes`。 + +另外,`LowNodeLoad` 识别出超过阈值的节点后会筛选 Pod,当筛选 Pod 时,可以配置要支持或者过滤的 namespace,或者配置 pod selector 筛选,也可以配置 `nodeFit` 检查每个备选 Pod 对应的 Node Affinity/Node Selector/Toleration 是否有与之匹配的 Node,如果没有的话,这种节点也会被忽略。当然可以考虑不启用这个能力,通过配置 `nodeFit` 为 false 后即可禁用,此时完全由底层的 `MigrationController` 通过 Koordinator Reservation 预留资源; + +当筛选出 Pod 后,会对这些 Pod 进行排序。会依靠Koordinator QoSClass、Kubernetes QoSClass、Priority、用量和创建时间等多个维度排序。 + +### cgroup v2 支持 + +#### 背景 + +Koordinator 中众多单机 QoS 能力和资源压制/弹性策略构建在 Linux Control Group (cgroups) 机制上,比如 CPU QoS (cpu)、Memory QoS (memory)、CPU Burst (cpu)、CPU Suppress (cpu, cpuset),koordlet 组件可以通过 cgroups (v1) 限制容器可用资源的时间片、权重、优先级、拓扑等属性。Linux 高版本内核也在持续增强和迭代了 cgroups 机制,带来了 cgroups v2 机制,统一 cgroups 目录结构,改善 v1 中不同 subsystem/cgroup controller 之间的协作,并进一步增强了部分子系统的资源管理和监控能力。Kubernetes 自 1.25 起将 cgroups v2 作为 GA (general availability) 特性,在 Kubelet 中启用该特性进行容器的资源管理,在统一的 cgroups 层次下设置容器的资源隔离参数,支持 MemoryQoS 的增强特性。 + +![cgroup v1/v2 结构](/img/cgroup-v1-and-v2.svg) + +在 Koordinator v1.1 中,单机组件 koordlet 新增对 cgroups v2 的支持,包括如下工作: + +- 重构了 Resource Executor 模块,以统一相同或近似的 cgroup 接口在 v1 和 v2 不同版本上的文件操作,便于 koordlet 特性兼容 cgroups v2 和合并读写冲突。 +- 在当前已开放的单机特性中适配 cgroups v2,采用新的 Resource Executor 模块替换 cgroup 操作,优化不同系统环境下的报错日志。 + +Koordinator v1.1 中大部分 koordlet 特性已经兼容 cgroups v2,包括但不限于: + +- 资源利用率采集 +- 动态资源超卖 +- Batch 资源隔离(BatchResource,废弃BECgroupReconcile) +- CPU QoS(GroupIdentity) +- Memory QoS(CgroupReconcile) +- CPU 动态压制(BECPUSuppress) +- 内存驱逐(BEMemoryEvict) +- CPU Burst(CPUBurst) +- L3 Cache 及内存带宽隔离(RdtResctrl) + +遗留的未兼容特性如 PSICollector 将在接下来的 v1.2 版本中进行适配,可以跟进 issue#407 获取最新进展。接下来的 Koordinator 版本中也将逐渐引入更多 cgroups v2 的增强功能,敬请期待。 + +#### 使用 cgroups v2 + +在 Koordinator v1.1 中,koordlet 对 cgroups v2 的适配对上层功能配置透明,除了被废弃特性的 feature-gate 以外,您无需变动 ConfigMap `slo-controller-config` 和其他 feature-gate 配置。当 koordlet 运行在启用 cgroups v2 的节点上时,相应单机特性将自动切换到 cgroups-v2 系统接口进行操作。 + +此外,cgroups v2 是 Linux 高版本内核(建议 >=5.8)的特性,对系统内核版本和 Kubernetes 版本有一定依赖。建议采用默认启用 cgroups v2 的 Linux 发行版以及 Kubernetes v1.24 以上版本。 + +更多关于如何启用 cgroups v2 的说明,请参照 Kubernetes 社区[文档](https://kubernetes.io/docs/concepts/architecture/cgroups/#using-cgroupv2)。 + +### 干扰检测指标采集 + +在真实的生产环境下,单机的运行时状态是一个“混沌系统”,资源竞争产生的应用干扰无法绝对避免。Koordinator 正在建立干扰检测与优化的能力,通过提取应用运行状态的指标,进行实时的分析和检测,在发现干扰后对目标应用和干扰源采取更具针对性的策略。 + +当前 Koordinator 已经实现了一系列 `Performance Collector`,在单机侧采集与应用运行状态高相关性的底层指标,并通过 Prometheus 暴露出来,为干扰检测能力和集群应用调度提供支持。 + +#### 指标采集 + +Performance Collector 由多个 feature-gate 进行控制,Koordinator 目前提供以下几个指标采集器: + +- `CPICollector`:用于控制 CPI 指标采集器。CPI:Cycles Per Instruction。指令在计算机中执行所需要的平均时钟周期数。CPI 采集器基于 Cycles 和 Instructions 这两个 Kernel PMU(Performance Monitoring Unit)事件以及 perf_event_open(2) 系统调用实现。 +- `PSICollector`:用于控制 PSI 指标采集器。PSI:Pressure Stall Information。表示容器在采集时间间隔内,因为等待 cpu、内存、IO 资源分配而阻塞的任务数。使用 PSI 采集器前,需要在 Anolis OS 中开启 PSI 功能,您可以参考[文档](https://help.aliyun.com/document_detail/155464.html)获取开启方法。 + +Performance Collector 目前是默认关闭的。您可以通过修改 Koordlet 的 feature-gates 项来使用它,此项修改不会影响其他 feature-gate + +``` +kubectl edit ds koordlet -n koordinator-system +``` + +```yaml +... +spec: + ... + spec: + containers: + - args: + ... + # modify here + # - -feature-gates=BECPUEvict=true,BEMemoryEvict=true,CgroupReconcile=true,Accelerators=true + - -feature-gates=BECPUEvict=true,BEMemoryEvict=true,CgroupReconcile=true,Accelerators=true,CPICollector=true,PSICollector=true +``` + +#### ServiceMonitor + +v1.1.0 版本的 Koordinator 为 Koordlet 增加了 ServiceMonitor 的能力,将所采集指标通过 Prometheus 暴露出来,用户可基于此能力采集相应指标进行应用系统的分析与管理。 + +ServiceMonitor 由 Prometheus 引入,故在 helm chart 中设置默认不开启安装,可以通过以下命令安装ServiceMonitor: + +``` +helm install koordinator https://... --set koordlet.enableServiceMonitor=true +``` + +部署后可在 Prometheus UI 找到该 Targets。 + +``` +# HELP koordlet_container_cpi Container cpi collected by koordlet +# TYPE koordlet_container_cpi gauge +koordlet_container_cpi{container_id="containerd://498de02ddd3ad7c901b3c80f96c57db5b3ed9a817dbfab9d16b18be7e7d2d047",container_name="koordlet",cpi_field="cycles",node="your-node-name",pod_name="koordlet-x8g2j",pod_namespace="koordinator-system",pod_uid="3440fb9c-423b-48e9-8850-06a6c50f633d"} 2.228107503e+09 +koordlet_container_cpi{container_id="containerd://498de02ddd3ad7c901b3c80f96c57db5b3ed9a817dbfab9d16b18be7e7d2d047",container_name="koordlet",cpi_field="instructions",node="your-node-name",pod_name="koordlet-x8g2j",pod_namespace="koordinator-system",pod_uid="3440fb9c-423b-48e9-8850-06a6c50f633d"} 4.1456092e+09 +``` + +可以期待的是,Koordinator 干扰检测的能力在更复杂的真实场景下还需要更多检测指标的补充,后续将在如内存、磁盘 IO 等其他诸多资源的指标采集建设方面持续发力。 + +### 其他更新点 + +通过 [v1.1 release](https://github.com/koordinator-sh/koordinator/releases/tag/v1.1.0) 页面,可以看到更多版本所包含的新增功能。 diff --git a/i18n/zh-Hans/docusaurus-plugin-content-blog/2023-04-07-release/index.md b/i18n/zh-Hans/docusaurus-plugin-content-blog/2023-04-07-release/index.md new file mode 100644 index 000000000..3c681c525 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-blog/2023-04-07-release/index.md @@ -0,0 +1,192 @@ +--- +slug: release-v1.2.0 +title: "Koordinator v1.2: 支持节点资源预留,兼容社区重调度策略" +authors: [zwzhang0107] +tags: [release] +--- + +## 背景 +Koordinator 是一个开源项目,基于阿里巴巴在容器调度领域多年累积的经验孵化诞生,可以提升容器性能,降低集群资源成本。通过混部、资源画像、调度优化等技术能力, +能够提高延迟敏感的工作负载和批处理作业的运行效率和可靠性,优化集群资源使用效率。 + +从 2022 年 4 月发布以来,Koordinator 迄今一共迭代发布了 10 个版本,吸引了了包括阿里巴巴、小米、小红书、爱奇艺、360、有赞 等在内的大量优秀工程师参与贡献。 +随着2023年春天的来临,Koordinator也迎来了它的一周年诞辰,在此我们很高兴的向大家宣布,Koordinator v1.2版本正式发布。新版本中Koordinator支持了节点资源预留功能, +并兼容了K8s社区的重调度策略,同时在单机侧增加了对AMD环境L3 Cache和内存带宽隔离的支持。 + +在新版本中,共有12位新加入的开发者参与到了Koordiantor社区的建设,他们是@Re-Grh,@chengweiv5,@kingeasternsun,@shelwinnn,@yuexian1234,@Syulin7,@tzzcfrank +@Dengerwei,@complone,@AlbeeSo,@xigang,@leason00,感谢以上开发者的贡献和参与。 + +## 版本功能特性解读 + +### 节点资源预留 +混部场景中包含的应用形态多种多样,除了已经完成云原生化的容器,还包含很多尚未完成容器化的应用,这部分应用会以进程的形式在宿主机上与K8s容器共同运行。 +为了减少K8s应用和其他类型应用在节点侧的资源竞争,Koordinator 支持将一部分资源预留,使其既不参与调度器的资源调度,也不参与节点侧的资源分配,达到资源分隔使用的效果。 +在v1.2版本中,Koordiantor已经支持CPU和内存资源维度的预留,并允许直接指定预留的CPU编号,具体如下。 + +#### 节点资源预留声明 +在Node上可以配置需要预留的资源量或具体的CPU编号,举例如下: +```yaml +apiVersion: v1 +kind: Node +metadata: + name: fake-node + annotations: # specific 5 cores will be calculated, e.g. 0, 1, 2, 3, 4, and then those core will be reserved. + node.koordinator.sh/reservation: '{"resources":{"cpu":"5"}}' +--- +apiVersion: v1 +kind: Node +metadata: + name: fake-node + annotations: # the cores 0, 1, 2, 3 will be reserved. + node.koordinator.sh/reservation: '{"reservedCPUs":"0-3"}' +``` +单机组件Koordlet在上报节点资源拓扑信息时,会将具体预留的CPU编号更新到NodeResourceTopology对象的Annotation中。 + +#### 调度及重调度场景适配 +调度器在分配资源的过程中,涉及了多种情况的资源校验,包括Quota管理,节点容量校验,CPU拓扑校验等等,这些场景都需要增加对节点预留资源的考虑,例如,调度器在计算节点CPU容量时,需要将节点预留的资源进行扣除。 +``` +cpus(alloc) = cpus(total) - cpus(allocated) - cpus(kubeletReserved) - cpus(nodeAnnoReserved) +``` +此外,对于Batch混部超卖资源的计算同样需要将这部分资源扣除,而考虑到节点中还包括一部分系统进程的资源消耗,Koord-Manager在计算时会取节点预留和系统用量的最大值,具体为: +``` +reserveRatio = (100-thresholdPercent) / 100.0 +node.reserved = node.alloc * reserveRatio +system.used = max(node.used - pod.used, node.anno.reserved) +Node(BE).Alloc = Node.Alloc - Node.Reserved - System.Used - Pod(LS).Used +``` +对于重调度,各插件策略需要在节点容量、利用率计算等场景感知节点预留资源量,此外,若已经有容器占用了节点的预留资源,重调度需要考虑将其进行驱逐,确保节点容量得到正确管理, +避免资源竞争。这部分重调度相关的功能,我们将在后续版本进行支持,也欢迎广大爱好者们一起参与共建。 + +#### 单机资源管理 +对于LS类型的Pod,单机Koordlet组件会根据CPU分配情况动态计算共享CPU池,对于节点预留的CPU核心会将其排除在外,确保LS类型pod和其他非容器化的进程资源隔离。 +同时,对于单机相关的QoS策略,例如CPUSuppress压制策略在计算节点利用率时,会将预留资源量考虑在内。 +``` +suppress(BE) := node.Total * SLOPercent - pod(LS).Used - max(system.Used, node.anno.reserved) +``` +关于节点资源预留功能的详细说明,可以参考 [设计文档](https://github.com/koordinator-sh/koordinator/blob/main/docs/proposals/scheduling/20221227-node-resource-reservation.md) 中的介绍。 + +### 兼容社区重调度策略 + +得益于 Koordinator Descheduler 的框架日益成熟,在 Koordinator v1.2 版本中,通过引入一种接口适配机制,可以无缝的对 Kubernetes Desceheduler 已有插件进行兼容,在使用时您只需部署 Koordinator Descheduler 即可使用到上游的全部功能。 + +在实现上,Koordinator Descheduler 通过 import 上游代码不做任何侵入式的改动,保证完全兼容上游所有的插件、参数配置以及其运行策略。同时,Koordinator 允许用户为上游插件指定增强的 evictor,从而复用 Koordinator 提供的资源预留、工作负载可用性保障以及全局流控等安全性策略。 + +兼容的插件列表包括: +- HighNodeUtilization +- LowNodeUtilization +- PodLifeTime +- RemoveFailedPods +- RemoveDuplicates +- RemovePodsHavingTooManyRestarts +- RemovePodsViolatingInterPodAntiAffinity +- RemovePodsViolatingNodeAffinity +- RemovePodsViolatingNodeTaints +- RemovePodsViolatingTopologySpreadConstraint +- DefaultEvictor + +在使用时,可以参考如下的方式配置,以 RemovePodsHavingTooManyRestarts 为例: + +```yaml +apiVersion: descheduler/v1alpha2 +kind: DeschedulerConfiguration +clientConnection: + kubeconfig: "/Users/joseph/asi/koord-2/admin.kubeconfig" +leaderElection: + leaderElect: false + resourceName: test-descheduler + resourceNamespace: kube-system +deschedulingInterval: 10s +dryRun: true +profiles: +- name: koord-descheduler + plugins: + evict: + enabled: + - name: MigrationController + deschedule: + enabled: + - name: RemovePodsHavingTooManyRestarts + pluginConfig: + - name: RemovePodsHavingTooManyRestarts + args: + apiVersion: descheduler/v1alpha2 + kind: RemovePodsHavingTooManyRestartsArgs + podRestartThreshold: 10 +``` + +### 资源预留调度能力增强 + +Koordinator 在比较早期的版本中引入了 Reservation 机制,通过预留资源并复用给指定特征的 Pod 使用,用于帮助解决资源交付确定性问题。 +例如重调度场景中期望被驱逐的 Pod 一定有资源可以使用,而不是被驱逐后无资源可用导致引起稳定性问题;又或者需要扩容时, +一些 PaaS 平台希望能够先确定是否满足应用调度编排的资源,再决定是否扩容,或者提前做一些预备工作等。 + +Koordinator Reservation 通过 CRD 定义,每个 Reservation 对象会在 koord-scheduler 内伪造成一个 Pod 进行调度, +这样的 Pod 我们称为 Reserve PodReserve Pod 就可以复用已有的调度插件和打分插件找到合适的节点,并最终在调度器内部状态中占据对应的资源。 +Reservation 在创建时都会指定预留的资源将来要给哪些 Pod 使用,可以指定具体某个 Pod,也可以指定某些 workload 对象,或者具备某些标签的 Pod 使用。 +当这些 Pod 通过 koord-scheduler 调度时,调度器会找到可以被该 Pod 使用的 Reservation 对象,并且优先使用 Reservation 的资源。 +并且 Reservation Status 中会记录被哪个 Pod 使用,以及 Pod Annotations 中也会记录使用了哪个 Reservation。 +Reservation 被使用后,会自动的清理内部状态,确保其他 Pod 不会因为 Reservation 导致无法调度。 + +在 Koordinator v1.2 中,我们做了大幅度的优化。首先我们放开了只能使用 Reservation 持有的资源的限制,允许跨出 Reservation 的资源边界, +既可以使用 Reservation 预留的资源,也可以使用节点上剩余的资源。而且我们通过非侵入式的方式扩展了 Kubernetes Scheduler Framework, +支持预留精细化资源,即可以预留 CPU 核和 GPU 设备等。我们也修改了 Reservation 可以被重复使用的默认行为,改为 AllocateOnce, +即 Reservation 一旦被某个 Pod 使用,该 Reservation 会被废弃。这样的改动是考虑到,AllocateOnce 更能覆盖大部分场景,这样作为默认行为,大家在使用时会更简单。 + +### 支持AMD环境下的L3 Cache和内存带宽隔离 +在v0.3.0版本中,Koordiantor已经支持了Intel环境的L3 Cache和内存带宽隔离,在最新的1.2.0版本中我们新增了对AMD环境的支持。 +Linux内核L3 Cache和内存带宽隔离能力提供了统一的resctrl接口,同时支持Intel和AMD环境,主要区别在于,Intel提供的内存带宽隔离接口为百分比格式, +而AMD提供的内存带宽隔离接口为绝对值格式,具体如下。 +``` +# Intel Format +# resctrl schema +L3:0=3ff;1=3ff +MB:0=100;1=100 + +# AMD Format +# resctrl schema +L3:0=ffff;1=ffff;2=ffff;3=ffff;4=ffff;5=ffff;6=ffff;7=ffff;8=ffff;9=ffff;10=ffff;11=ffff;12=ffff;13=ffff;14=ffff;15=ffff +MB:0=2048;1=2048;2=2048;3=2048;4=2048;5=2048;6=2048;7=2048;8=2048;9=2048;10=2048;11=2048;12=2048;13=2048;14=2048;15=2048 +``` +接口格式包含两部分,L3表示对应的socket或CCD可用的“路数”(way),以16进制的数据格式表示,每个比特位表示一路 +MB表示对应的socket或CCD可以使用的内存带宽范围,Intel可选范围为0~100的百分比格式,AMD对应的为绝对值格式,单位为Gb/s,2048表示不限制。 +Koordiantor统一提供了百分比格式的接口,并自动感知节点环境是否为AMD,决定resctrl接口中填写的格式。 +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + resource-qos-config: |- + { + "clusterStrategy": { + "lsClass": { + "resctrlQOS": { + "enable": true, + "catRangeStartPercent": 0, + "catRangeEndPercent": 100, + "MBAPercent": 100 + } + }, + "beClass": { + "resctrlQOS": { + "enable": true, + "catRangeStartPercent": 0, + "catRangeEndPercent": 30, + "MBAPercent": 100 + } + } + } + } +``` + +### 其他功能 +通过 [v1.2 release](https://github.com/koordinator-sh/koordinator/releases/tag/v1.2.0) 页面,可以看到更多版本所包含的新增功能。 + +## 未来计划 +在接下来的版本中,Koordiantor重点规划了以下功能,具体包括: +- 硬件拓扑感知调度,综合考虑节点CPU、内存、GPU等多个资源维度的拓扑关系,在集群范围内进行调度优化。 +- 对重调度器的可观测性和可追溯性进行增强。 +- GPU资源调度能力的增强。 + +Koordinator 是一个开放的社区,非常欢迎广大云原生爱好者们通过各种方式一起参与共建,无论您在云原生领域是初学乍练还是驾轻就熟,我们都非常期待听到您的声音! diff --git a/i18n/zh-Hans/docusaurus-plugin-content-blog/2023-08-16-release/index.md b/i18n/zh-Hans/docusaurus-plugin-content-blog/2023-08-16-release/index.md new file mode 100644 index 000000000..1139fd22f --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-blog/2023-08-16-release/index.md @@ -0,0 +1,184 @@ +--- +slug: release-v1.3.0 +title: "Koordinator v1.3: 增强资源预留,支持 NRI,提供节点画像的 Mid 资源超卖" +authors: [saintube] +tags: [release] +--- + +## 背景 + +Koordinator 是一个开源项目,旨在基于阿里巴巴在容器调度领域的多年经验,提供一个完整的混部解决方案,包含混部工作负载编排、资源调度、资源隔离及性能调优等多方面能力,来帮助用户优化容器性能,充分发掘空闲物理资源,提升资源效率,增强延迟敏感型工作负载和批处理作业的运行效率和可靠性。 + +在此,我们很高兴地向各位宣布 Koordinator v1.3.0 版本的发布。自 2022 年 4 月发布 v0.1.0 版本以来,Koordinator 迄今迭代发布了共 11 个版本,吸引了了包括阿里巴巴、Intel、小米、小红书、爱奇艺、360、有赞等企业在内的大量优秀工程师参与贡献。在 v1.3.0 版本中,Koordinator 带来了 NRI (Node Resource Interface) 支持、Mid 资源超卖等新特性,并在资源预留、负载感知调度、DeviceShare 调度、负载感知重调度、调度器框架、单机指标采集和资源超卖框架等特性上进行了稳定性修复、性能优化与功能增强。 + +在 v1.3.0 版本中,共有 12 位新加入的开发者参与到了 Koordinator 社区的建设,他们是 @bowen-intel,@BUPT-wxq,@Gala-R,@haoyann,@kangclzjc,@Solomonwisdom,@stulzq,@TheBeatles1994,@Tiana2018,@VinceCui,@wenchezhao,@zhouzijiang,感谢期间各位社区同学的积极参与和贡献,也感谢所有同学在社区的持续投入。 + +## 版本功能特性解读 + +### 资源预留增强 + +资源预留(Reservation)能力自 v0.5.0 版本提出后,经历了一年的打磨和迭代,在 v1.3.0 版本中针对抢占、设备预留、Coscheduling 等场景增强了预留机制,新增 allocatePolicy 字段用于定义不同的预留资源分配策略。最新的资源预留 API 如下: + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: Reservation +metadata: + name: reservation-demo +spec: + # template字段填写reservation对象的资源需求和affinity信息,就像调度pod一样. + template: + namespace: default + spec: + containers: + - args: + - '-c' + - '1' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + requests: + cpu: 500m + memory: 1Gi + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: topology.kubernetes.io/zone + operator: In + values: + - cn-hangzhou-i + schedulerName: koord-scheduler # 指定koord-scheduler来负责reservation对象的调度. + # 指定可分配预留资源的owners. + owners: + - labelSelector: + matchLabels: + app: app-demo + ttl: 1h + # 指定预留资源是否仅支持一次性的分配. + allocateOnce: true + # 指定预留资源的分配策略,当前支持以下策略: + # - Default: 缺省配置,不限制对预留资源的分配,pod优先分配自节点上的预留资源;如果预留资源不足,则继续分配节点空闲资源。 + # - Aligned: pod优先分配自节点上的预留资源;如果预留资源不足,则继续分配节点空闲资源,但要求这部分资源满足Pod需求。该策略可用于规避pod同时分配多个reservation的资源。 + # - Restricted: 对于预留资源包含的各个资源维度,pod必须分配自预留资源;其余资源维度可以分配节点空闲资源。包含了Aligned策略的语义。 + # 同一节点尚不支持Default策略和Aligned策略或Restricted策略共存。 + allocatePolicy: "Aligned" + # 控制预留资源是否可以使用 + unschedulable: false +``` + +此外,资源预留在 v1.3.0 中还包含了如下兼容性和性能优化: + +1. 增强 Reservation 的抢占,允许 Reservation 内的 Pod 间抢占,拒绝 Reservation 外的 Pod 抢占 Reservation 内的 Pod。 +2. 增强设备预留场景,如果节点上设备资源被部分预留并被 pod 使用,支持剩余资源的分配。 +3. 支持 Reservation 使用 Coscheduling。 +4. 新增 Reservation Affinity协议,允许用户一定从Reservation内分配资源。 +5. 优化 Reservation 兼容性,修复因 Reservation 导致原生打分插件失效的问题。 +6. 优化因引入 Reservation 导致的调度性能回归问题。 +7. 修复 Reservation 预留端口误删除的问题。 + +关于资源预留的设计,详见[Designs - Resource Reservation](/docs/designs/resource-reservation)。 + +### 其他调度增强 + +在 v1.3.0 中,koordinator 在调度和重调度方面还包含如下增强: + +1. DeviceShare 调度 + + - 更改 GPU 资源使用方式,使用 GPU Share API 时,必须声明`koordinator.sh/gpu-memory`或`koordinator.sh/gpu-memory-ratio`,允许不声明`koordinator.sh/gpu-core`。 + - 支持打分,可用于实现 GPU Share 场景和整卡分配场景的 bin-packing 或 spread,并支持卡粒度 binpacking 或 spread。 + - 修复用户误删除 Device CRD 导致调度器内部状态异常重复分配设备的问题。 + +2. 负载感知调度:修复对仅填写 Request 的 Pod 的调度逻辑。 + +3. 调度器框架:优化 PreBind 阶段的 Patch 操作,将多个插件的 Patch 操作合并为一次提交,提升操作效率,降低 APIServer 压力。 + +4. 重调度 + + - LowNodeLoad 支持按节点池设置不同的负载水位和参数等。自动兼容原有配置。 + - 跳过 schedulerName 不是 koord-scheduler 的Pod,支持配置不同的 schedulerName。 + +### NRI 资源管理模式 + +Koordinator 的 runtime hooks 支持两种模式,standalone 和 CRI proxy,然而这两种模式各自有着一些限制。当前,尽管在 standalone 模式做了很多优化,但当想获得更加及时的 Pod 或容器的事件或者环境变量的注入时还是需要依赖 proxy 模式。然而, proxy 模式要求单独部署 koord-runtime-proxy 组件来代理 CRI (Container Runtime Interface) 请求, 同时需要更改 Kubelet 的启动参数并重启 Kubelet。 + +NRI(Node Resource Interface),即节点资源接口,是 CRI 兼容的容器运行时插件扩展的通用框架,独立于具体的容器运行时(e.g. containerd, cri-o), 提供不同生命周期事件的接口,允许用户在不修改容器运行时源代码的情况下添加自定义逻辑。特别的是,2.0 版本 NRI 只需要运行一个插件实例用于处理所有 NRI 事件和请求,容器运行时通过 Unix-Domain Socket 与插件通信,使用基于 Protobuf 的协议数据,和 1.0 版本 NRI 相比拥有更高的性能,能够实现有状态的 NRI 插件。 + +通过 NRI 的引入,既能及时的订阅 Pod 或者容器的生命周期事件,又避免了对 Kubelet 的侵入修改。在 Koordinator v1.3.0 中,我们引入 NRI 这种社区推荐的方式来管理 runtime hooks 来解决之前版本遇到的问题,大大提升了 Koordinator 部署的灵活性和处理的时效性,提供了一种优雅的云原生系统的资源管理标准化模式。 + +![nri](/img/nri-proposal.png) + +> 注:NRI 模式不支持 docker 的容器运行时,使用 docker 的用户请继续使用 standalone 模式或 proxy 模式。 + +关于 Koordinator 启用 NRI 的部署方式,请见[Installation - Enable NRI Mode Resource Management](/docs/installation#enable-nri-mode-resource-management)。 + +### 节点画像和 Mid 资源超卖 + +Koordinator 中将节点资源分为4种资源优先级模型 Prod、Mid、Batch 和 Free,低优先级资源可以复用高优先级已分配但未使用的物理资源,以提升物理资源利用率;同时,资源优先级越高,提供的资源也越稳定,例如 Batch 资源采用高优先级资源短期(short-term)已分配但未使用的超卖资源,而 Mid 资源采用高优先级资源长周期(long-term)已分配但未使用的超卖资源。不同资源优先级模型如下图所示: + +![resource-priority-model](/img/resource-model.png) + +Koordinator v1.3.0 新增了节点画像能力,基于 Prod 的历史资源用量进行峰值预测,以支持 Mid-tier 的资源超卖调度。Mid 资源的超卖计算公式如下: + +``` +MidAllocatable := min(ProdReclaimable, NodeAllocatable * thresholdRatio) +ProdReclaimable := max(0, ProdAllocated - ProdPeak * (1 + safeMargin)) +``` + +- `ProdPeak`:通过节点画像,预估的节点上已调度 Prod Pod 在中长周期内(e.g. 12h)的用量峰值。 +- `ProdReclaimable`:基于节点画像结果,预估在中长周期内可复用的 Prod 资源。 +- `MidAllocatable`:节点上可分配的 Mid 资源。 + +此外,Mid 资源的单机隔离保障将在下个版本得到完善,相关动态敬请关注[Issue #1442](https://github.com/koordinator-sh/koordinator/issues/1442)。 +在 v1.3.0 版本中,用户可以查看和提交 Mid-tier 的超卖资源,也可以通过以下 Prometheus metrics 来观测节点画像的趋势变化。 + +```bash +# 查看节点的CPU资源画像,reclaimable指标表示预测的可回收资源量,predictor对应不同的预测模型 +koordlet_node_predicted_resource_reclaimable{node="test-node", predictor="minPredictor", resource="cpu", unit="core"} +# 查看节点的内存资源画像,reclaimable指标表示预测的可回收资源量,predictor对应不同的预测模型 +koordlet_node_predicted_resource_reclaimable{node="test-node", predictor="minPredictor", resource="memory", unit="byte"} +``` + +```bash +$ kubectl get node test-node -o yaml +apiVersion: v1 +kind: Node +metadata: + name: test-node +status: + # ... + allocatable: + cpu: '32' + memory: 129636240Ki + pods: '110' + kubernetes.io/mid-cpu: '16000' # allocatable cpu milli-cores for Mid-tier pods + kubernetes.io/mid-memory: 64818120Ki # allocatable memory bytes for Mid-tier pods + capacity: + cpu: '32' + memory: 129636240Ki + pods: '110' + kubernetes.io/mid-cpu: '16000' + kubernetes.io/mid-memory: 64818120Ki +``` + +关于 Koordinator 节点画像的设计,详见[Design - Node Prediction](/docs/designs/node-prediction)。 + +### 其他功能 + +通过 [v1.3.0 Release](https://github.com/koordinator-sh/koordinator/releases/tag/v1.3.0) 页面,可以看到更多包含在 v1.3.0 版本的新增功能。 + +## 未来计划 + +在接下来的版本中,Koordinator 目前规划了以下功能: + +- 硬件拓扑感知调度,综合考虑节点 CPU、内存、GPU 等多个资源维度的拓扑关系,在集群范围内进行调度优化。 +- 提供节点可分配资源的放大机制。 +- NRI 资源管理模式的完善和增强。 + +更多信息,敬请关注 [Milestone v1.4.0](https://github.com/koordinator-sh/koordinator/milestone/12)。 + +## 结语 + +最后,Koordinator 是一个开放的社区,欢迎广大云原生爱好者们随时通过各种方式参与共建,无论您在云原生领域是初学乍到还是驾轻就熟,我们都非常期待听到您的声音! diff --git a/i18n/zh-Hans/docusaurus-plugin-content-blog/2024-01-15-release/index.md b/i18n/zh-Hans/docusaurus-plugin-content-blog/2024-01-15-release/index.md new file mode 100644 index 000000000..a9e744d0f --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-blog/2024-01-15-release/index.md @@ -0,0 +1,301 @@ +--- +slug: release-v1.4.0 +title: "Koordinator v1.4: 更多的计算负载类型和更灵活的资源管理机制" +authors: [ZiMengSheng] +tags: [release] +--- + +## 背景 + +Koordinator 作为一个积极发展的开源项目,自 2022 年 4 月发布 v0.1.0 版本以来,经历了多次迭代,持续为 Kubernetes 生态系统带来创新和增强。项目的核心是提供混部工作负载编排、混部资源调度、混部资源隔离和混部性能调优的综合解决方案,帮助用户优化容器性能,并提升集群资源使用效率。 + +在过去的版本迭代中,Koordinator 社区不断壮大,已经得到了包括阿里巴巴、蚂蚁科技、Intel、小米、小红书、爱奇艺、360、有赞、趣玩、美亚柏科、PITS 等知名企业工程师的积极参与和贡献。每一个版本都是在社区共同努力下推进的,反映了项目在实际生产环境中解决问题的能力。 + +今天我们很高兴的向大家宣布,Koordinator v1.4.0 版本正式发布。在本次发布中,Koordinator 引入了 Kubernetes 与 YARN 负载混部、NUMA 拓扑对齐策略、CPU 归一化和冷内存上报等新特性,同时重点增强了弹性配额管理、宿主机非容器化应用的 QoS 管理、重调度防护策略等领域的功能。这些新增和改进点旨在更好地支持企业级 Kubernetes 集群环境,特别是对于复杂和多样化的应用场景。 + +v1.4.0 版本的发布,将为用户带来更多的计算负载类型支持和更灵活的资源管理机制,我们期待这些改进能够帮助用户应对更多企业资源管理挑战。在 v1.4.0 版本中,共有 11 位新加入的开发者参与到了 Koordinator 社区的建设,他们是 @shaloulcy,@baowj-678,@zqzten,@tan90github,@pheianox,@zxh326,@qinfustu,@ikaven1024,@peiqiaoWang,@bogo-y,@xujihui1985,感谢期间各位社区同学的积极参与和贡献,也感谢所有同学在社区的持续投入。 + +## 版本功能特性解读 + +### 1. 支持 K8s 与 YARN 混部 + +Koordinator 已经支持了 K8s 生态内的在离线混部,然而在 K8s 生态外,仍有相当数量的大数据任务运行在传统的 Hadoop YARN 之上。YARN 作为发展多年的大数据生态下的资源管理系统,承载了包括 MapReduce、Spark、Flink 以及 Presto 等在内的多种计算引擎。 + +Koordinator 社区会同来自阿里云、小红书、蚂蚁金服的开发者们共同启动了 Hadoop YARN 与 K8s 混部项目 Koordinator YARN Copilot,支持将 Hadoop NodeManager 运行在 kubernetes 集群中,充分发挥不同类型负载错峰复用的技术价值。Koordinator YARN Copilot 具备以下特点: + +- 面向开源生态:基于 Hadoop YARN 开源版本,不涉及对 YARN 的侵入式改造; +- 统一资源优先级和 QoS 策略:YARN NM 使用 Koordinator 的 Batch 优先级资源,遵循 Koordinator QoS 管理策略; +- 节点级别的资源共享:Koordinator 提供的混部资源,既可被 K8s Pod 使用,也可被 YARN task使用,不同类型的离线应用可运行在同一节点。 + +![img](/img/hadoop-k8s.svg) + +关于 Koordinator YARN Copilot 的详细设计,以及在小红书生产环境的使用情况,请参考[往期文章](https://mp.weixin.qq.com/s/N0QEJYyOhoDZoVQ6hGhnmg)以及[社区官方文档](https://koordinator.sh/zh-Hans/docs/next/designs/koordinator-yarn)。 + +### 2. 引入 NUMA 拓扑对齐策略 + +运行在 Kubernetes 集群中的工作负载日益多样化。尤其是在机器学习等领域,对于高性能计算资源的需求持续上升。在这些领域中,不仅需要大量 CPU 资源,还经常需要 GPU 和 RDMA 等其他高速计算资源配合使用;并且,为了获得最佳的性能,这些资源往往需要在同一个 NUMA 节点,甚至同一个 PCIE 中。 + +Kubernetes 的 Kubelet 提供了 Topology Manager 来管理资源分配的 NUMA 拓扑,试图在 Kubelet 的 Admission 阶段从节点层面对齐多种资源的拓扑。然而,节点组件没有调度器的全局视角以及为 Pod 选择节点的时机,可能导致 Pod 被调度到无法满足拓扑对齐策略的节点上,从而导致 Pod 由于 `Topology Affinity`错误无法启动。 + +为了解决这一问题,Koordinator 将 NUMA 拓扑选择和对齐的时机放在中心调度器中,从集群级别优化资源之间的 NUMA 拓扑。在本次发布的版本中,Koordinator 将 CPU 资源(包含 Batch 资源)的 NUMA 感知调度和 GPU 设备的 NUMA 感知调度作为 alpha 功能支持,整套 NUMA 感知调度快速演进中。 + +koordinator 支持用户通过节点的 Label 配置节点上多种资源的 NUMA 拓扑对齐策略,可配置策略如下: + +- `None` 是默认策略,不执行任何拓扑对齐。 +- `BestEffort` 表示节点不严格按照 NUMA 拓扑对齐来分配资源。只要节点的剩余总量满足 Pods 的需求,调度器总是可以将这样的节点分配给 Pods。 +- `Restricted` 表示节点严格按照 NUMA 拓扑对齐来分配资源,即调度器在分配多个资源时必须只选择相同的一个或多个 NUMA 节点,否则不应使用该节点;可以使用多个 NUMA 节点。例如,如果一个Pod请求 33C,并且每个 NUMA 节点有 32C,那么它可以被分配使用两个 NUMA 节点。如果这个Pod还需要请求 GPU/RDMA,那么它需要位于与 CPU 相同的 NUMA 节点上。 +- `SingleNUMANode` 与 `Restricted` 类似,也是严格按照 NUMA 拓扑对齐,但与 `Restricted` 不同的是,`Restricted` 允许使用多个NUMA节点,而 `SingleNUMANode` 只允许使用一个NUMA 节点。 + +举例,我们可以为 `node-0`设置策略 `SingleNUMANode`: + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + node.koordinator.sh/numa-topology-policy: "SingleNUMANode" + name: node-0 +spec: + ... +``` + +在生产环境中,用户可能已经开启了 Kubelet 的拓扑对齐策略,这个策略会由 koordlet 更新到 `NodeResourceTopology`CRD 对象中的 `TopologyPolicies`字段。当 Kubelet 的策略和用户在 Node 上设置的策略相冲突时,以 Kubelet 策略为准。Koordinator 调度器基本采用与 Kubelet Topology Manager 相同的 NUMA 对齐策略语义,Kubelet 策略 `SingleNUMANodePodLevel `和`SingleNUMANodeContainerLevel`被映射为 `SingleNUMANode`。 + +在为节点配置好 NUMA 对齐策略的前提下,调度器可以为每个 Pod 选出许多个符合条件的 NUMA Node 分配结果。Koordinator 当前支持 NodeNUMAResource 插件配置 CPU 和内存资源的 NUMA Node 分配结果打分策略,包括 `LeastAllocated`和 `MostAllocated`, 默认为 `LeastAllocated` 策略,资源支持配置权重。调度器最终将选择得分最高的 NUMA Node 分配结果。如下例,我们配置 NUMA Node 分配结果打分策略为 `MostAllocated`: + +```yaml +apiVersion: kubescheduler.config.k8s.io/v1beta2 +kind: KubeSchedulerConfiguration +profiles: + - pluginConfig: + - name: NodeNUMAResource + args: + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: NodeNUMAResourceArgs + scoringStrategy: # Here configure Node level scoring strategy + type: MostAllocated + resources: + - name: cpu + weight: 1 + - name: memory + weight: 1 + - name: "kubernetes.io/batch-cpu" + weight: 1 + - name: "kubernetes.io/batch-memory" + weight: 1 + numaScoringStrategy: # Here configure NUMA-Node level scoring strategy + type: MostAllocated + resources: + - name: cpu + weight: 1 + - name: memory + weight: 1 + - name: "kubernetes.io/batch-cpu" + weight: 1 + - name: "kubernetes.io/batch-memory" + weight: 1 +``` + +### 3. ElasticQuota 再进化 + +为了充分地利用集群资源、降低管控系统成本,用户常常将多个租户的负载部署在一个集群中。在集群资源有限的情况下,不同租户之间必然会发生资源争抢。有的租户的负载可能一直被满足,而有的租户的负载一直无法得到执行。这就产生对公平性的诉求。配额机制是非常自然地保障租户间公平性的方式,给每个租户一个配额,租户可以使用配额内的资源,超过配额的任务将不被调度和执行。然而,简单的配额管理无法满足租户对云的弹性期待。用户希望除了配额之内的资源请求可以被满足外,配额之外的资源请求也可以按需地被满足。 + +在之前的版本中,Koordinator 复用了上游 ElasticQuota 的协议,允许租户设置 Min 表达其一定要满足的资源诉求,允许设置 Max 限制其最大可以使用的资源和表达在集群资源不足的情况下对集群剩余资源的使用权重。另外,koordinator 观察到,一些租户可能通过 Min 申请了配额,但是实际的任务申请可能并没有充分利用该配额。由此,为了更近一步地提高资源利用率,Koordinator 允许租户间借用/归还资源。 + +除了提供弹性的配额机制满足租户按需诉求外,Koordinator 在 ElasticQuota 上增加注解将其组织成树的结构,方便用户表达树形的组织架构。 + +![img](/img/quotatree1.jpg) + +上图是使用了 Koordinator 弹性配额的集群中常见的 Quota 结构树。Root Quota 是连接配额与集群中实际资源之间的桥梁。在之前的设计中,Root Quota 只在调度器逻辑中存在,在本次发布中,我们将 Root Quota 也通过 CRD 的形式暴露给用户,用户可以通过 koordinator-root-quota 这个 ElasticQuota CRD 查看 Root Quota 信息。 + +#### 3.1 引入 Multi QuotaTree + +大型集群中的节点的形态是多样的,例如云厂商提供的 ECS VM 会有不同的架构,常见的是 amd64 和 arm64,相同架构又会有不同种类的机型,而且一般会把节点按可用区划分。不同类型的节点放到同一个 Quota Tree 中管理时,其特有的属性将丢失,当用户希望精细化管理机器的特有属性时,当前的 ElasticQuota 显得不够精确。为了满足用户灵活的资源管理或资源隔离诉求,Koordinator 支持用户将集群中的资源划分为多份,每一份由一个 Quota Tree 来管理,如下图所示: + +![img](/img/multiquotatree.png) + +同时,为了帮助用户简化管理复杂性,Koordinator 在 v1.4.0 中 引入了 ElasticQuotaProfile 机制,用户可以通过 nodeSelector 快速的将节点关联到不同的 QuotaTree 中,如下实例所示: + +```yaml +apiVersion: quota.koordinator.sh/v1alpha1 +kind: ElasticQuotaProfile +metadata: + labels: + kubernetes.io/arch: amd64 + name: amd64-profile + namespace: kube-system +spec: + nodeSelector: + matchLabels: + kubernetes.io/arch: amd64 // 挑选 amd64 节点 + quotaName: amd64-root-quota // 匹配的 root quota 名称 +--- +apiVersion: quota.koordinator.sh/v1alpha1 +kind: ElasticQuotaProfile +metadata: + labels: + kubernetes.io/arch: arm64 + name: arm64-profile + namespace: kube-system +spec: + nodeSelector: + matchLabels: + kubernetes.io/arch: arm64 // 挑选 arm64 节点 + quotaName: arm64-root-quota // 匹配的 root quota 名称 +``` + +关联好 QuotaTree 之后,用户在每一个 QuotaTree 中与之前的 ElasticQuota 用法一致。当用户提交 Pod 到对应的 Quota 时,当前仍然需要用户完成 Pod NodeAffinity 的管理,以确保 Pod 运行在正确的节点上。未来,我们会增加一个特性帮助用户自动管理 Quota 到 Node 的映射关系。 + +#### 3.2 支持 non-preemptible + +Koordinator ElasticQuota 支持把 ElasticQuota 中 Min 未使用的部分共享给其他 ElasticQuota 使用从而提高资源利用效率,但当资源紧张时,会通过抢占机制把借用配额的 Pod 抢占驱逐走拿回资源。 + +在实际生产环境中,有一些在线服务如果从其他 ElasticQuota 中借用了这部分额度,后续又发生了抢占,是可能影响服务质量的。这类工作负载实质上是不能被抢占的。 + +为了实现这个机制,Koordinator v1.4.0 引入了新的 API,用户只需要在 Pod 上声明 `quota.scheduling.koordinator.sh/preemptible: false `表示这个 Pod 不可以被抢占。 + +调度器调度时发现 Pod 声明了不可抢占,那么此类 Pod 的可用配额的上限不能超过 min,所以这里也需要注意的是,启用该能力时,一个 ElasticQuota 的 min 需要设置的合理,并且集群内有相应的资源保障。 + +这个特性不会破坏原有的行为。 + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example + namespace: default + labels: + quota.scheduling.koordinator.sh/name: "quota-example" + quota.scheduling.koordinator.sh/preemptible: false +spec: +... +``` + +#### 3.3 其它改进 + +1. Koordinator Scheduler 过去支持跨 Namespace 使用同一个 ElasticQuota 对象,但有一些场景下,希望只被一个或者多个有限的 Namespace 可以共享同一个对象,为了支持这个场景,用户可以在 ElasticQuota 上增加 annotation `quota.scheduling.koordinator.sh/namespaces`,对应的值为一个 JSON 字符串数组。 +2. 性能优化:过去的实现中,当 ElasticQuota 发生变化时,ElasticQuota 插件会重建整棵 Quota 树,在 v1.4.0 版本中做了优化。 +3. 支持忽略 Overhead:当 Pod 使用一些安全容器时,一般是在 Pod 中声明 Overhead 表示安全容器自身的资源开销,但这部分资源成本最终是否归于终端用户承担取决于资源售卖策略。当期望不用用户承担这部分成本时,那么就要求 ElaticQuota 忽略 overhead。在 v1.4.0 版本中,可以开启 featureGate `ElasticQuotaIgnorePodOverhead` 启用该功能。 + +### 4. CPU 归一化 + +随着 Kubernetes 集群中节点硬件的多样化,不同架构和代数的 CPU 之间性能差异显著。因此,即使 Pod 的 CPU 请求相同,实际获得的计算能力也可能大不相同,这可能导致资源浪费或应用性能下降。CPU 归一化的目标是通过标准化节点上可分配 CPU 的性能,来保证每个 CPU 单元在 Kubernetes 中提供的计算能力在异构节点间保持一致。 + +为了解决该问题,Koordinator 在 v1.4.0 版本中实现了一套支持 CPU 归一化机制,根据节点的资源放大策略,调整节点上可分配的 CPU 资源数量,使得集群中每个可分配的 CPU 通过缩放实现算力的基本一致。整体的架构如下图所示: + +![img](/img/cpu-normalization.svg) + +CPU 归一化分为两个步骤: + +1. CPU 性能评估,计算不同 CPU 的性能基准,可以参考工业级性能评测标准 [SPEC CPU](https://www.spec.org/cpu2017/),这部分 Koordinator 项目未提供; +2. 配置 CPU 归一化系数到 Koordinator,调度系统基于归一化系数来调度资源,这部分 Koordinator 提供; + +将 CPU 归一化比例信息配置到 koord-manager 的 slo-controller-config 中,配置示例如下: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + cpu-normalization-config: | + { + "enable": true, + "ratioModel": { + "Intel(R) Xeon(R) Platinum 8269CY CPU @ 2.50GHz": { + "baseRatio": 1.29, + "hyperThreadEnabledRatio": 0.82, + "turboEnabledRatio": 1.52, + "hyperThreadTurboEnabledRatio": 1.0 + }, + "Intel Xeon Platinum 8369B CPU @ 2.90GHz": { + "baseRatio": 1.69, + "hyperThreadEnabledRatio": 1.06, + "turboEnabledRatio": 1.91, + "hyperThreadTurboEnabledRatio": 1.20 + } + } + } + # ... +``` + +对于配置了 CPU 归一化的节点,Koordinator 通过 Webhook 拦截 Kubelet 对 Node.Status.Allocatable 的更新以实现 CPU 资源的缩放,最终在节点上呈现出归一后的 CPU 资源可分配量。 + +### 5. 改进的重调度防护策略 + +Pod 迁移是一个复杂的过程,涉及审计、资源分配、应用启动等步骤,并且与应用升级、扩展场景以及集群管理员的资源操作和维护操作混合在一起。因此,如果同时有大量 Pods 正在进行迁移,可能会对系统的稳定性产生影响。此外,如果同一工作负载的许多Pods同时被迁移,也会影响应用的稳定性。此外,如果同时迁移多个作业中的 Pods,可能会造成惊群效应。因此,我们希望顺序处理每个作业中的 Pods。 + +Koordinator 在之前提供的 PodMigrationJob 功能中已经提供了一些防护策略来解决上述问题。在 v1.4.0 版本中,Koordinator 将之前的防护策略增强为仲裁机制。当有大量的 PodMigrationJob 可以被执行时,由仲裁器通过排序和筛选,来决定哪些 PodMigrationJob 可以得到执行。 + +排序过程如下: + +- 根据迁移开始时间与当前时间的间隔进行排序,间隔越小,排名越高。 +- 根据 PodMigrationJob 的 Pod 优先级进行排序,优先级越低,排名越高。 +- 按照工作负载分散 Jobs,使得同一作业中的 PodMigrationJobs 靠近。 +- 如果作业中已有 Pods 正在迁移,则该 PodMigrationJob 的排名更高。 + +筛选过程如下: + +- 根据工作负载、节点、命名空间等对 PodMigrationJob 进行分组和筛选。 +- 检查每个工作负载中正在运行状态的 PodMigrationJob 数量,达到一定阈值的将被排除。 +- 检查每个工作负载中不可用副本的数量是否超出了最大不可用副本数,超出的将被排除。 +- 检查目标 Pod 所在节点上正在迁移的 Pod 数量是否超过单个节点的最大迁移量,超出的将被排除。 + +### 6. 冷内存上报 + +为提升系统性能,内核一般尽可能不让应用程序请求的页面缓存空闲,而是尽可能将其分配给应用程序。虽然内核分配了这些内存,但是应用可能不再访问,这些内存被称为冷内存。 + +Koordinator 在 1.4 版本中引入冷内存上报功能,主要为未来冷内存回收功能打下基础。冷内存回收主要用于应对两个场景: + +1. 对于标准的 Kubernetes 集群,当节点内存水位过高时,突发的内存请求容器导致系统直接内存回收,操作系统的直接内存回收触发时会影响已经运行容器的性能,如果回收不及时极端场景可能触发整机 oom。保持节点内存资源的相对空闲,对提升运行时稳定性至关重要 +2. 在混部场景中,高优先级应用程序请求但未使用的资源可以被低优先级应用程序回收利用。对内存而言,操作系统未回收的内存,是不能被 Koordinator 调度系统看到的。为了提高混部资源效率,回收容器未使用的内存页面可以提高整机的资源利用效率 + +Koordlet 在 Collector Plugins 中添加了一个冷页面回收器,用于读取由 kidled(Anolis 内核)、kstaled(Google)或 DAMON(Amazon)导出的 cgroup 文件 memory.idle_stat。该文件包含页面缓存中的冷页面信息,并存在于 memory 的每个层次结构中。目前 koordlet 已经对接了 kidled 冷页面收集器并提供了其他冷页面收集器接口。 + +在收集冷页面信息后,冷页面回收器将把收集到的指标(例如节点、Pod 和容器的热页面使用量和冷页面大小)存到 metriccache 中,最后该数据会被上报到 NodeMetric CRD 中。 + +用户可以通过 NodeMetric 启用冷内存回收和配置冷内存收集策略,当前提供了 usageWithHotPageCache、usageWithoutPageCache 和 usageWithPageCache 三种策略,更多的细节详见社区[设计文档](https://github.com/koordinator-sh/koordinator/blob/main/docs/proposals/koordlet/20230728-support-cold-memory-compute.md)。 + +### 7. 非容器化应用的 QoS 管理 + +在企业容器化过程中,除了已经运行在 K8s 上的应用,可能还会存在一些非容器化的应用运行在主机上。为了更好兼容企业在容器化过程这一过渡态,Koordinator 开发了节点资源预留机制,可以未尚未容器化的应用预留资源并赋予特定的 QoS 特性。与 Kubelet 提供的资源预留配置不同,Koordinator 主要目标是解决这些非容器化应用与容器化应用运行时的 QoS 问题,整体的方案如下图所示: + +![img](/img/host-application.svg) + +目前,应用程序需要按照规范将进程启动到对应的 cgroup 中,Koordinator 未实现自动的 cgroup 搬迁工具。针对宿主机非容器化应用,支持 QoS 如下: + +- LS (Latency Sensitive) + +- - CPU QoS(Group Identity):应用按照规范将进程运行在 cgroup 的 cpu 子系统中,koordlet 根据 CPU QoS 的配置 resource-qos-config 为其设置 Group Identity 参数; + - CPUSet Allocation:应用按照规范将进程运行在 cgroup 的 cpu 子系统中,koordlet 将为其设置 cpu share pool 中的所有 CPU 核心。 + +- BE (Best-effort) + +- - CPU QoS(Group Identity):应用按照规范将进程运行在 cgroup 的 cpu 子系统中,koordlet 根据 CPU QoS 的配置为其设置 Group Identity 参数。 + +关于宿主机应用 QoS 管理的详细设计,可以参考[社区文档](https://koordinator.sh/zh-Hans/docs/next/user-manuals/host-application-qos),后续我们将陆续增加其他QoS策略对宿主机应用的支持。 + +### 8. 其它特性 + +除了上述新特性和功能增强外,Koordinator 在 v1.4.0 版本还做了一些如下的 bugfix 和优化: + +1. RequiredCPUBindPolicy:精细化 CPU 编排支持 Required 的 CPU 绑定策略配置,表示严格按照指定的 CPU 绑定策略分配 CPU,否则调度失败。 +2. CICD:Koordinator 社区在 v1.4.0 提供了一套 e2e 测试的 Pipeline;提供了 ARM64 镜像。 +3. Batch 资源计算策略优化:支持了 maxUsageRequest 的计算策略,用于更保守地超卖高优资源;优化了节点上短时间大量 Pod 启停时,Batch allocatable 被低估的问题;完善了对 hostApplication、thirdparty allocatable、dangling pod used 等特殊情况的考虑。 +4. 其它:利用 libpfm4&perf group 优化 CPI 采集、SystemResourceCollector 支持自定义的过期时间配置、BE Pod 支持根据 evictByAllocatable 策略计算CPU 满足度、Koordlet CPUSetAllocator 修复了对于 LS 和 None Qos 的 Pod 的过滤逻辑、RDT 资源控制支持获得 sandbox 容器的 task IDs 等 + +通过 [v1.4.0 Release](https://github.com/koordinator-sh/koordinator/releases/tag/v1.4.0) 页面,可以看到更多包含在 v1.4.0 版本的新增功能。 + +## 未来计划 + +在接下来的版本中,Koordinator 目前规划了以下功能: + +- Core Scheduling。在运行时侧,Koordinator 开始探索下一代 CPU QoS 能力,通过利用 Linux Core Scheduling 等内核机制,增强的物理核维度的资源隔离,降低混部的安全性风险,相关工作详见 [Issue #1728](https://github.com/koordinator-sh/koordinator/issues/1728)。 +- 设备联合分配。在 AI 大模型分布式训练场景中,不同机器 GPU 之间通常需要通过高性能网卡相互通信,且 GPU 和高性能网卡就近分配的时候性能更好。Koordinator 正在推进支持多种异构资源的联合分配,目前已经在协议上和调度器分配逻辑上支持联合分配;单机侧关于网卡资源的上报逻辑正在探索中。 + +更多信息,敬请关注 [Milestone v1.5.0](https://github.com/koordinator-sh/koordinator/milestone/14)。 + +## 结语 + +最后,我们十分感谢 Koordinator 社区的所有贡献者和用户,是您们的积极参与和宝贵意见让 Koordinator 不断进步。我们期待您继续提供反馈,并欢迎新的贡献者加入我们的行列。 diff --git a/i18n/zh-Hans/docusaurus-plugin-content-blog/anolis-CPU-Co-location/index.md b/i18n/zh-Hans/docusaurus-plugin-content-blog/anolis-CPU-Co-location/index.md new file mode 100644 index 000000000..3f44dd899 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-blog/anolis-CPU-Co-location/index.md @@ -0,0 +1,65 @@ +--- +title: "龙蜥 plugsched 神器助力 Koordinator 云原生单机混部—— 内核 CPU QoS 揭秘" +authors: Dengerwei +--- + +## 什么是 CPU 混部 + +CPU 混部是指将不同类型的业务部署到同一台机器上运行,让它们共享机器上的 CPU 资源以提升 CPU 利用率,从而降低机器的采购和运营成本。但是,对于有些类型的任务来说,它们对延时非常的敏感,比如电商、搜索或 web 服务等,这类任务的实时性很高,但是通常对资源的消耗却不是很多,我们称之为在线任务;还有一类任务,它们更多的关注计算或者批处理,对延时没有要求,但是消耗的资源相对较多,我们称之为离线任务。 + +当这两类任务同时部署到同一台机器上时,由于离线任务对资源的占用较多,资源竞争导致在线任务的延时受到了很大的影响,而且,在超线程架构的机器上,即使离线任务和在线任务跑在不同的超线程 CPU 上,流水线和 cache 的竞争也会导致在线任务的运行受到影响。于是,CPU 混部技术诞生了,来解决离线任务对在线任务延时的影响,同时还能进一步提升 CPU 资源的利用率。 +

+

图1 单机混部 CPU 利用率示意图

+ + +## 内核 CPU 混部技术 + +CPU 混部技术,主要是通过单机操作系统调度器来实现的,通过任务类型来决定所分配到的 CPU 资源。Koordinator 社区主要使用的单机操作系统发行版有 Alibaba Cloud Linux 2/3(简称 Alinux2/3) 和 CentOS7.9。对于 Alinux2/3,它使用的是龙蜥社区的 Group Identity CPU 混部技术,在操作系统内核中提供了 CPU 混部能力。Group Identity 在原有的 CFS 调度器中新增了另一个运行队列来区分在线和离线任务,而且,为了避免对端 CPU(超线程架构)上离线任务的干扰,Group Identity 会对其进行驱逐。龙蜥的 Group Identity 技术已经经过阿里双十一等大型活动以及大规模商业化的验证,其 CPU 混部能力也得到广大用户和开发者的认可。 + +但是对于 CentOS 发行版来说,到目前为止还没有提供任何 CPU 混部相关的技术和能力。对于 CentOS CPU 混部能力的缺失,可能有以下几种解决方案: +* 制作 CentOS 的衍生版系统,并包含 CPU 混部技术; +* 迁移到 Alibaba Cloud Linux 2/3 操作系统发行版; + +对于第一种方案,需要从 CentOS 镜像站中下载其内核源码,将 CPU 混部技术移植到内核,编译后安装,然后重启系统便可以使用该技术,但这会涉及到业务迁移和停机,势必会给业务方带来昂贵的代价。 +对于第二种方案,虽然迁移工作会有一定的工作量,但是,Alinux2/3 或 Anolis OS 包含了完整的混部资源隔离方案(CPU 混部仅仅是其中一点),技术红利所带来的收益远比迁移代价要大得多。而且 CentOS 即将停服,为了解决 CentOS 停服问题,龙蜥社区推出了 Anolis OS 发行版操作系统,该发行版系统完全兼容 CentOS,用户可以进行无缝迁移。 + +## 龙蜥 CPU 混部插件 + +针对 Koordinator 云原生 CentOS 单机操作系统 CPU 混部能力的缺失,龙蜥社区开发人员给出了另一种方案,利用 plugsched 调度器热升级技术提供一种 CPU 混部技术的调度器插件包,该插件包含了阿里云早期(2017年)的 CPU 混部技术 bvt + noise clean,该技术采用的是 throttle 机制,当调度器选择下一个任务时,它会检测对端 CPU 上的任务类型以及当前 CPU 正在执行的任务类型,如果在、离线任务同时存在,则会将离线任务 throttle 掉,然后继续选择下一个任务进行调度,保证在线任务优先执行且不被对端 CPU 上的离线干扰。该 CPU 混部调度器插件可直接安装到 CentOS7.9,不需要停机和业务迁移等工作。 + +### Plugsched SDK 神器 + +Plugsched 调度器热升级,是龙蜥社区推出的 plugsched SDK 调度器热升级开发工具,它可从 Linux 内核中将调度器解耦,形成一个独立的模块,然后将 CPU 混部技术移植到调度器模块,形成一个调度器插件,然后将其直接安装到运行的系统中就可以使用 CPU 混部技术。Plugsched,可以对内核调度器特性动态的进行增、删、改,来满足业务的需求,且无需进行业务迁移和停机升级,还可以回滚。内核开发人员可通过 plugsched SDK 生产出各种类型的调度器插件来满足不同的业务场景。 + +Plugsched 调度器热升级论文《Efficient Scheduler Live Update for Linux Kernel with Modularization》已被 ASPLOS 顶会收录,里面详细介绍了 plugsched 技术原理和应用价值,以及全面的测试和评估。目前,plugsched 生产的插件已在蚂蚁集团、阿里云和国内某大型互联网企业规模部署。 + +Plugsched 开源链接:[https://gitee.com/anolis/plugsched](https://gitee.com/anolis/plugsched) + +### CPU 混部插件测试 + +开发人员对该调度器插件进行了 CPU 混部的测试,服务端配置: +* 测试机器:阿里云神龙裸金属服务器,104 CPU,384 GB 内存 +* 系统配置:CentOS 7.9 发行版,内核版本 3.10,安装 CPU 混部调度器插件 +* 测试内容:在线任务是 Nginx 服务,容器配置为 80C 10GB,Nginx workers 数量为 80;离线任务是 ffmpeg 视频转码,容器配置为 50C 20GB,线程数量为 50。 +* 测试case: + * 基线:单独启动 Nginx 容器 + * 对照组:同时启动 Nginx 容器和 ffmpeg 容器,但不设置优先级(不启用混部功能) + * 实验组:同时启动 Nginx 容器和 ffmpeg 容器,给 Nginx 设置在线高优先级,ffmpeg 为离线低优先级(启用混部功能) + +在另一台压测机上使用 wrk 工具向 Nginx 服务发起请求,结果如下:(单位:ms) + +| | 基线 | 对照组 | 实验组 | +| --- | --- | --- | --- | +| RT-P50 | 0.223 | 0.245(+9.86%) | 0.224(+0.44%) | +| RT-P75 | 0.322 | 0.387(+20.18%) | 0.338(+4.96%) | +| RT-P90 | 0.444 | 0.575(+29.50) | 0.504(+13.51%) | +| RT-P99 | 0.706 | 1.7(+140.79) | 0.88(+24.64%) | +| CPU% | 25.15% | 71.7% | 49.15% | + +从上面的结果来看,没有 CPU 混部插件,离线任务对在线任务的影响很大,P99 延时增长了一倍多,而安装 CPU 混部插件后,P99 长尾延时的影响显著降低,CPU 利用率也接近50%。 + +该插件虽然能显著降低离线对在线任务的干扰,但还是逊色于龙蜥社区的 Group Identity 技术。龙蜥的 Group Identity 技术能让在线受到的干扰小于 5%,而且整机利用率的提升也比该插件要更多一些,达到 60% 以上(可查阅:[koordinator 混部最佳实践手册](https://help.aliyun.com/document_detail/450006.html))。这些差异的原因在于,1)内核自身的差异,CentOS 7.9 使用的是比较早的 3.10 内核,而龙蜥使用的是 4.19/5.10 内核,3.10 内核调度器性能本身就不及 4.19/5.10;2)Group Identity 的实现原理相比 noise clean 更适合 CPU 混部场景。 + +## 结语 + +最后,欢迎广大技术人员、开源爱好者和读者用户加入 Koordinator、openanolis 社区,享受社区带来的技术,不论是 Group Identity 还是 Plugsched 神器,一定会给大家带来意想不到的收益和价值,欢迎大家共建社区,与社区共同交流、成长和发展。 diff --git a/i18n/zh-Hans/docusaurus-plugin-content-blog/authors.yml b/i18n/zh-Hans/docusaurus-plugin-content-blog/authors.yml new file mode 100644 index 000000000..820946ebe --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-blog/authors.yml @@ -0,0 +1,47 @@ +hormes: + name: Fangsong Zeng + title: Koordinator maintainer + url: https://github.com/hormes + image_url: https://github.com/hormes.png + +joseph: + name: Joseph + title: Koordinator maintainer + url: https://github.com/eahydra + image_url: https://github.com/eahydra.png + +jason: + name: Jason + title: Koordinator maintainer + url: https://github.com/jasonliu747 + image_url: https://github.com/jasonliu747.png + +FillZpp: + name: Siyu Wang + title: Koordinator maintainer + url: https://github.com/FillZpp + image_url: https://github.com/FillZpp.png + +Dengerwei: + name: Erwei Deng + title: Openanolis developer + url: https://github.com/Dengerwei + image_url: https://github.com/Dengerwei.png + +zwzhang0107: + name: Zuowei Zhang + title: Koordinator maintainer + url: https://github.com/zwzhang0107 + image_url: https://github.com/zwzhang0107.png + +saintube: + name: Rougang Han + title: Koordinator member + url: https://github.com/saintube + image_url: https://github.com/saintube.png + +ZiMengSheng: + name: Jianyu Wang + title: Koordinator member + url: https://github.com/ZiMengSheng + image_url: https://github.com/ZiMengSheng.png diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/current/installation.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/current/installation.md index 1279a5659..b4b197e15 100644 --- a/i18n/zh-Hans/docusaurus-plugin-content-docs/current/installation.md +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/current/installation.md @@ -20,7 +20,7 @@ $ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ $ helm repo update # Install the latest version. -$ helm install koordinator koordinator-sh/koordinator --version 1.3.0 +$ helm install koordinator koordinator-sh/koordinator --version 1.4.0 ``` ## 使用 Helm 升级 @@ -33,7 +33,7 @@ $ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ $ helm repo update # Upgrade the latest version. -$ helm upgrade koordinator koordinator-sh/koordinator --version 1.3.0 [--force] +$ helm upgrade koordinator koordinator-sh/koordinator --version 1.4.0 [--force] ``` 注意: @@ -80,7 +80,7 @@ NRI 资源管理模式是*默认启用*的。你无需修改 koordlet 配置就 | `manager.log.level` | Log level that koord-manager printed | `4` | | `manager.replicas` | Replicas of koord-manager deployment | `2` | | `manager.image.repository` | Repository for koord-manager image | `koordinatorsh/koord-manager` | -| `manager.image.tag` | Tag for koord-manager image | `v1.3.0` | +| `manager.image.tag` | Tag for koord-manager image | `v1.4.0` | | `manager.resources.limits.cpu` | CPU resource limit of koord-manager container | `1000m` | | `manager.resources.limits.memory` | Memory resource limit of koord-manager container | `1Gi` | | `manager.resources.requests.cpu` | CPU resource request of koord-manager container | `500m` | @@ -95,7 +95,7 @@ NRI 资源管理模式是*默认启用*的。你无需修改 koordlet 配置就 | `scheduler.log.level` | Log level that koord-scheduler printed | `4` | | `scheduler.replicas` | Replicas of koord-scheduler deployment | `2` | | `scheduler.image.repository` | Repository for koord-scheduler image | `koordinatorsh/koord-scheduler` | -| `scheduler.image.tag` | Tag for koord-scheduler image | `v1.3.0` | +| `scheduler.image.tag` | Tag for koord-scheduler image | `v1.4.0` | | `scheduler.resources.limits.cpu` | CPU resource limit of koord-scheduler container | `1000m` | | `scheduler.resources.limits.memory` | Memory resource limit of koord-scheduler container | `1Gi` | | `scheduler.resources.requests.cpu` | CPU resource request of koord-scheduler container | `500m` | @@ -107,7 +107,7 @@ NRI 资源管理模式是*默认启用*的。你无需修改 koordlet 配置就 | `scheduler.hostNetwork` | Whether koord-scheduler pod should run with hostnetwork | `false` | | `koordlet.log.level` | Log level that koordlet printed | `4` | | `koordlet.image.repository` | Repository for koordlet image | `koordinatorsh/koordlet` | -| `koordlet.image.tag` | Tag for koordlet image | `v1.3.0` | +| `koordlet.image.tag` | Tag for koordlet image | `v1.4.0` | | `koordlet.resources.limits.cpu` | CPU resource limit of koordlet container | `500m` | | `koordlet.resources.limits.memory` | Memory resource limit of koordlet container | `256Mi` | | `koordlet.resources.requests.cpu` | CPU resource request of koordlet container | `0` | diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4.json b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4.json new file mode 100644 index 000000000..6057e117d --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4.json @@ -0,0 +1,22 @@ +{ + "sidebar.docs.category.Getting Started": { + "message": "快速开始", + "description": "快速开始" + }, + "sidebar.docs.category.Architecture": { + "message": "架构", + "description": "The label for category Architecture in sidebar docs" + }, + "sidebar.docs.category.User Manuals": { + "message": "用户手册", + "description": "The label for category User Manuals in sidebar docs" + }, + "sidebar.docs.category.Design Details": { + "message": "设计", + "description": "The label for category Design Details in sidebar docs" + }, + "sidebar.docs.category.Best Practices": { + "message": "最佳实践", + "description": "The label for category Best Practices in sidebar docs" + } +} diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/architecture/overview.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/architecture/overview.md new file mode 100644 index 000000000..8e62edf0e --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/architecture/overview.md @@ -0,0 +1,60 @@ +# 概述 + +本节描述了 Koordinator 部署到 Kubernetes 集群相关的架构、组件和核心概念。Koordinator 由两个控制面([Koordinator Scheduler](#koordinator-scheduler)/[Koordinator Manager](#koordinator-manager))和一个 DaemonSet 组件([Koordlet](#koordlet))组成。 +Koordinator 在 Kubernetes 原有的能力基础上增加了混部功能,并兼容了 Kubernetes 原有的工作负载。 + +![架构](/img/architecture.png) + +## Koord-Scheduler + +Koord-Scheduler 以 Deployment 的形式部署在集群中,用于增强 Kubernetes 在 QoS-aware,差异化 SLO 以及任务调度场景的资源调度能力,具体包括: + +- QoS-aware 调度,包括负载感知调度让节点间负载更佳平衡,资源超卖的方式支持运行更多的低优先级工作负载。 +- 差异化 SLO,包括 CPU 精细化编排,为不同的工作负载提供不同的 QoS 隔离策略(cfs,LLC,memory 带宽,网络带宽,磁盘io)。 +- 任务调度,包括弹性额度管理,Gang 调度,异构资源调度等,以支持更好的运行大数据和 AI 工作负载。 + +为了更好的支持不同类型的工作负载,Koord-scheduler 还包括了一些通用性的能力增强: + +- Reservation,支持为特定的 Pod 或者工作负载预留节点资源。资源预留特性广泛应用于重调度,资源抢占以及节点碎片整理等相关优化过程。 +- Node Reservation,支持为 kubernetes 之外的工作负载预留节点资源,一般应用于节点上运行着非容器化的负载场景。 + +## Koord-Descheduler + +Koord-Decheduler 以 Deployment 的形式部署在集群中,它是 kubernetes 上游社区的增强版本,当前包含: + +- 重调度框架, Koord-Decheduler 重新设计了全新重调度框架,在可扩展性、资源确定性以及安全性上增加了诸多的加强,更多的[细节](../designs/descheduler-framework). +- 负载感知重调度,基于新框架实现的一个负载感知重调度插件,支持用户配置节点的安全水位,以驱动重调度器持续优化集群编排,从而规避集群中出现局部节点热点. + +## Koord-Manager + +Koord-Manager 以 Deployment 的形式部署,通常由两个实例组成,一个 leader 实例和一个 backup 实例。Koordinator Manager 由几个控制器和 webhooks 组成,用于协调混部场景下的工作负载,资源超卖(resource overcommitment)和 SLO 管理。 + +目前,提供了三个组件: + +- Colocation Profile,用于支持混部而不需要修改工作负载。用户只需要在集群中做少量的配置,原来的工作负载就可以在混部模式下运行,了解更多关于[Colocation Profile](../user-manuals/colocation-profile.md)。 +- SLO 控制器,用于资源超卖(resource overcommitment)管理,根据节点混部时的运行状态,动态调整集群的超发(overcommit)配置比例。该控制器的核心职责是管理混部时的 SLO,如智能识别出集群中的异常节点并降低其权重,动态调整混部时的水位和压力策略,从而保证集群中 pod 的稳定性和吞吐量。 +- Recommender(即将推出),它使用 histograms 来统计和预测工作负载的资源使用细节,用来预估工作负载的峰值资源需求,从而支持更好地分散热点,提高混部的效率。此外,资源 profiling 还将用于简化用户资源规范化配置的复杂性,如支持 VPA。 + +## Koordlet + +Koordlet 以 DaemonSet 的形式部署在 Kubernetes 集群中,用于支持混部场景下的资源超卖(resource overcommitment)、干扰检测、QoS 保证等。 + +在Koordlet内部,它主要包括以下模块: + +- 资源 Profiling,估算 Pod 资源的实际使用情况,回收已分配但未使用的资源,用于低优先级 Pod 的 overcommit。 +- 资源隔离,为不同类型的 Pod 设置资源隔离参数,避免低优先级的 Pod 影响高优先级 Pod 的稳定性和性能。 +- 干扰检测,对于运行中的 Pod,动态检测资源争夺,包括 CPU 调度、内存分配延迟、网络、磁盘 IO 延迟等。 +- QoS 管理器,根据资源剖析、干扰检测结果和 SLO 配置,动态调整混部节点的水位,抑制影响服务质量的 Pod。 +- 资源调优,针对混部场景进行容器资源调优,优化容器的 CPU Throttle、OOM 等,提高服务运行质量。 + +## Koord-RuntimeProxy + +Koord-RuntimeProxy 以 systemd service 的形式部署在 Kubernetes 集群的节点上,用于代理 Kubelet 与 containerd/docker 之间的 CRI 请求。这一个代理被设计来支持精细化的资源管理策略,比如为不同 QoS Pod 设置不同的 cgroup 参数,包括内核 cfs quota,resctl 等等技术特性,以改进 Pod 的运行时质量。。 + +## 下一步 + +以下是推荐下一步阅读的内容: + +- 学习 Koordinator 的[资源模型](./resource-model)。 +- 学习 Koordinator 的[Priority](./priority)。 +- 学习 Koordinator 的[QoS](./qos)。 diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/architecture/priority.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/architecture/priority.md new file mode 100644 index 000000000..3ed09e6aa --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/architecture/priority.md @@ -0,0 +1,88 @@ +# 优先级 + +Koordinator 在 Kubernetes 优先级类型的基础上定义了一套规范,并扩展了优先级的一个维度以对混部场景的细粒度支持。 + +## 定义 + +优先级用数字表示,目前定义了四个类: + +PriorityClass|优先级范围|描述 +----- | ----------- | -------- +koord-prod | [9000, 9999] | 需要提前规划资源配额,并且保证在配额内成功。 +koord-mid | [7000, 7999] | 需要提前规划资源配额,并且保证在配额内成功。 +koord-batch | [5000, 5999] | 需要提前规划资源配额,一般允许借用配额。 +koord-free | [3000, 3999] | 不保证资源配额,可分配的资源总量取决于集群的总闲置资源。 + +PriorityClass 目前留有一些暂未使用的区间,以支持未来可能的扩展。 + +## 约束 + +Koordinator 将不同类型的工作负载匹配到不同的优先级: + +- koord-prod,运行典型的延迟敏感型服务,一般是指需要 "实时 "响应的服务类型,比如通过点击移动APP中的按钮调用的典型服务。 +- koord-mid,对应于长周期的可用资源,一般用于运行一些实时计算、人工智能训练任务/作业,如 tensorflow/pytorch 等。 +- koord-batch,对应于的短周期可用资源,运行典型的离线批处理作业,一般指离线分析类作业,如日级大数据报告、非交互式 SQL 查询。 +- koord-free,运行低优先级的离线批处理作业,一般指不做资源预算,利用闲置资源尽量完成,如开发人员为测试目提交的作业。 + +## Koordinator 优先级与 Kubernetes优先级的对比 + +Koordinator 在 Kubernetes 集群中部署时会初始化这四个 PriorityClass。 + +``` +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: koord-prod +value: 9000 +description: "This priority class should be used for prod service pods only." +--- +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: koord-mid +value: 7000 +description: "This priority class should be used for mid service pods only." +--- +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: koord-batch +value: 5000 +description: "This priority class should be used for batch service pods only." +--- +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: koord-free +value: 3000 +description: "This priority class should be used for free service pods only." +``` + +在每个 PriorityClass 内,Koordinator 允许用户为精细化资源调度设置混部 Pod 的优先级。 + +## 示例 + +下面的 YAML 是一个 Pod 配置的例子,它使用了前面例子中创建的 PriorityClass 和优先级。 + +``` +apiVersion: v1 +kind: Pod +metadata: + name: nginx + labels: + env: test + koordinator.sh/priority: "5300" +spec: + containers: + - name: nginx + image: nginx + imagePullPolicy: IfNotPresent + priorityClassName: koord-batch +``` + +## 下一步是什么 + +以下是推荐下一步阅读的内容: + +- 学习 Koordinator 的[资源模型](./resource-model)。 +- 学习 Koordinator 的[QoS](./qos)。 diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/architecture/qos.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/architecture/qos.md new file mode 100644 index 000000000..057b565a1 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/architecture/qos.md @@ -0,0 +1,35 @@ +# QoS + +QoS 用于表达节点上 Pod 的运行质量,如获取资源的方式、获取资源的比例、QoS 保障策略等。 + +## 定义 + +Koordinator 调度系统支持的 QoS 有五种类型: + +QoS | 特点 | 说明 +--- | ---- | ------------- +SYSTEM | 系统进程,资源受限 | 对于 DaemonSets 等系统服务,虽然需要保证系统服务的延迟,但也需要限制节点上这些系统服务容器的资源使用,以确保其不占用过多的资源 +LSE(Latency Sensitive Exclusive) | 保留资源并组织同 QoS 的 pod 共享资源 | 很少使用,常见于中间件类应用,一般在独立的资源池中使用 +LSR(Latency Sensitive Reserved) | 预留资源以获得更好的确定性 | 类似于社区的 Guaranteed,CPU 核被绑定 +LS(Latency Sensitive) | 共享资源,对突发流量有更好的弹性 | 微服务工作负载的典型QoS级别,实现更好的资源弹性和更灵活的资源调整能力 +BE(Best Effort) | 共享不包括 LSE 的资源,资源运行质量有限,甚至在极端情况下被杀死 | 批量作业的典型 QoS 水平,在一定时期内稳定的计算吞吐量,低成本资源 + +## QoS CPU 编排隔离与共享 + +![img](/img/qos-cpu-orchestration.png) + +## Koordinator QoS与 Kubernetes QoS 的对比 + +从[定义](#定义)部分可以看出,Koordinator 的 QoS 比 Kubernetes 的 QoS 更复杂,因为在混部场景下,我们需要对延迟敏感的工作负载的 QoS 进行微调,以满足混部时性能的需求。 + +Koordinator 和 Kubernetes QoS 之间是有对应关系的: + +Koordinator QoS | Kubernetes QoS +--------------- | -------------- +SYSTEM | --- +LSE | Guaranteed +LSR | Guaranteed +LS | Guaranteed/Burstable +BE | BestEffort + +Koordlet 根据 Pod 的优先级和 QoS 定义,触发相应的资源隔离和 QoS 保障。 diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/architecture/resource-model.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/architecture/resource-model.md new file mode 100644 index 000000000..099ce0607 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/architecture/resource-model.md @@ -0,0 +1,37 @@ +# 资源模型 + +混部是一套资源调度解决方案,用于对延迟敏感的工作负载与大数据计算工作负载进行精细化编排。它需要解决两个主要问题: + +1. 如何为延迟敏感的工作负载调度资源,以满足性能和长尾延迟的要求。**这里涉及到的**关键点是资源调度策略和 QoS 感知策略。 +2. 如何调度和编排大数据计算工作负载,以较低的成本满足任务对计算资源的需求。**这里涉及到的**关键是如何在极端异常情况下实现合理的资源超额配置和 QoS 保障。 + +## 定义 + +![Resource Model](/img/resource-model.png) + +上图是 Koordinator 的混部资源模型,其基本思想是利用那些已分配但未使用的资源来运行低优先级的 pod。如图所示,有四条线: + +1. limit:灰色,高优先级 Pod 所请求的资源量,对应于 Kubernetes 的 Pod 请求。 +2. usage:红色,Pod 实际使用的资源量,横轴为时间线,红线为 Pod 负载随时间变化的波动曲线。 +3. short-term reservation:深蓝色,这是基于过去(较短)时期内的资源使用量,对未来一段时间内其资源使用量的估计。预留和限制的区别在于,分配的未使用(未来不会使用的资源)可以用来运行短期执行的批处理 Pod。 +4. long-term reservation:浅蓝色,与 short-term reservation 类似,但估计的历史使用期更长。从保留到限制的资源可以用于生命周期较长的Pod,与短期的预测值相比,可用的资源较少,但更稳定。 + +整个混部资源调度是基于上图所示的资源模型构建的,不仅可以满足各种工作负载的资源需求,还可以充分利用集群的闲置资源。 + +## SLO描述 + +在集群中运行的 Pod 资源 SLO 由两个概念组成,即优先级和 QoS。 + +* 优先级,即资源的优先级,代表了请求资源被调度的优先级。通常情况下,优先级会影响 Pod 在调度器待定队列中的相对位置。 + +* QoS,代表 Pod 运行时的服务质量。如cgroups cpu share、cfs 配额、LLC、内存、OOM 优先级等等。 + +需要注意的是,Priority 和 QoS 是两个维度的概念,但在实际业务场景中,两者之间会有一些约束(不是所有的组合都是合法的)。 + +## 下一步是什么 + +以下是推荐下一步阅读的内容: + +* 学习 Koordinator 的[优先级](./priority)。 +* 学习 Koordinator 的[QoS](./qos.md)。 + diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/best-practices/anolis_plugsched.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/best-practices/anolis_plugsched.md new file mode 100644 index 000000000..185b2633a --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/best-practices/anolis_plugsched.md @@ -0,0 +1,37 @@ +--- +sidebar_position: 2 +--- + +# Anolis Plugsched + +为了提升CentOS 7.9操作系统内核在CPU资源维度的混部效果,龙蜥社区提供了一种插件化的解决方案,即利用 plugsched 调度器热升级技术提供一种 CPU 混部技术的调度器插件包。该插件可直接安装到 CentOS 7.9,不需要停机和业务迁移等工作。了解更多信息,请参阅[Blog](https://koordinator.sh/blog/anolis-CPU-Co-location) + +## Prerequisites + +- Kernel: 必须使用官方CentOS 7.9的内核。 +- version == 3.10.0 +- release >= 1160.81.1 + +## 使用 Plugsched + +### 安装插件 + + ``` + # rpm -ivh https://github.com/koordinator-sh/koordinator/releases/download/v1.1.1/scheduler-bvt-noise-clean-$(uname -r).rpm + ``` + +如果你更新内核版本,你可以使用如下命令安装新的插件。 + + ``` + # rpm -ivh https://github.com/koordinator-sh/koordinator/releases/download/v1.1.1/scheduler-bvt-noise-clean-$(uname -r).rpm --oldpackage + ``` + +安装完成后,你可以在cpu cgroup目录下看到 `cpu.bvt_warp_ns` ,其使用方法与Group Identity特性兼容。 + +### 移除插件 + +移除插件可以使用 `rpm -e` 命令,然后 `cpu.bvt_warp_ns` 将也不再存在。请确保卸载插件前没有任何任务还在使用 `cpu.bvt_warp_ns` 。 + +## 使用Koordinator的CPU QoS功能 + +请参阅对应的[用户文档](../user-manuals/cpu-qos.md)。 \ No newline at end of file diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/best-practices/colocation-of-spark-jobs.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/best-practices/colocation-of-spark-jobs.md new file mode 100644 index 000000000..ee27d1389 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/best-practices/colocation-of-spark-jobs.md @@ -0,0 +1,101 @@ +--- +sidebar_position: 1 +--- + +# Colocation of Spark Jobs +Apache Spark is an analysis engine for large-scale data processing, which is widely used in Big Data, SQL Analysis and Machine Learning scenarios. This tutorial provides a quick practice guide about running Spark jobs in colocation mode with other latency sensitive applications by Koordinator, which is helpful for improving cluster resource utilization. For more details about how to use, compose, and work with Koordinator colocation, please refer to the [Introduction](../) + +## Requirements +### Koordinator Components +Before submitting Spark jobs as colocate mode, you need to ensure all Koordinator components have already been successfully installed. Please follow the step in [Installation](../installation) guide. + +### Install Kubernetes Operator for Apache Spark +To simplify running of Spark jobs in Cluster, we import the Kubernetes Operator for Apache Spark in this practice, which uses Kubernetes custom resource for managing Spark applications. + +With the help of Helm [chart](https://github.com/koordinator-sh/koordinator/tree/main/examples/spark-operator-chart), Kubernetes Operator for Apache Spark can be easily installed using the command below. +``` +$ helm install koord-spark-operator ./spark-operator-chart/ --namespace spark-operator +``` + +Installing the chart will create a namespace `spark-operator` and if doesn't exist, besides, helm will create a spark-operator Deployment and set up RBAC role for it. After the installation, you should see the operator in running successfully by checking the status of helm release. +``` +$ helm status --namespace spark-operator koord-spark-operator +``` + +## Run Spark Applications with Koordinator +Due to the mechanism that Spark driver pod needs a Kubernetes service account to manage executor pods, the service account must be authorized with appropriate permissions. Run the following command to create namespace `spark-demo` and service account `spark` before submitting jobs. +``` +$ kubectl apply -f examples/spark-jobs/service-account.yaml +``` + +Next, run the following command to create Colocation Profile so that all pods submitted following in namespace `spark-demo` will run in colocation mode. See this [tutorial](../user-manuals/colocation-profile) to learn more about Colocation Profile. +``` +$ kubectl apply -f examples/spark-jobs/cluster-colocation-profile.yaml +``` + +Submit a Spark TC example job to namespace `spark-demo` with the command: +``` +$ kubectl apply -f examples/spark-jobs/spark-tc-complex.yaml +``` + +Then, check the status of Spark application by running the following command. +``` +$ kubectl get sparkapplication -n spark-demo spark-tc-complex +``` + +This will show similar content as following: +``` +NAME STATUS ATTEMPTS START FINISH AGE +spark-tc-complex RUNNING 1 2022-03-30T09:11:22Z 14s +``` +Now, all pods submitted to namespace `spark-demo` will be switched to colocation mode, check spark-driver pod as below for example. We can see the protocols like`koordinator.sh/qosClass: BE` and `kubernetes.io/batch-cpu` are successfully injected to pod by Colocation Profile. +``` +apiVersion: v1 +kind: Pod +metadata: + labels: + koordinator.sh/qosClass: BE + spark-role: driver + ... +spec: + containers: + - args: + - driver + - --properties-file + - /opt/spark/conf/spark.properties + - --class + - org.apache.spark.examples.SparkTC + - local:///opt/spark/examples/jars/spark-examples_2.12-3.2.1-tc1.2.jar + resources: + limits: + kubernetes.io/batch-cpu: "1000" + kubernetes.io/batch-memory: 3456Mi + requests: + kubernetes.io/batch-cpu: "1000" + kubernetes.io/batch-memory: 3456Mi + ... +``` + +## Evaluation +With the help of Koordinator, when pods resource usage is idle, resources already requested can be reallocated to other colocation pods by the overcommitment model, which can significantly improve the resource utilization of cluster. + +In our experiment environment, before the Spark job submitted, we can see the cluster allocatable resources run out while the actual resource usage is in low level. +``` +$ kubectl describe node + Allocated resources: + Resource Requests + cpu 7620m (95.25%) + +$ kubectl top node + NAME CPU(cores) CPU% + cn-hangzhou.your-node-1 1190m 14.8% + cn-hangzhou.your-node-2 1620m 20.25% +``` + +After submit the Spark job in colocation mode, those unused resources will be reallocated through `batch priority` to Spark pods, so that we can make the cluster a higher utilization level. +``` +$ kubectl top node +NAME CPU(cores) CPU% +cn-hangzhou.your-node-1 4077m 52% +cn-hangzhou.your-node-2 3830m 49% +``` \ No newline at end of file diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/best-practices/fine-grained-cpu-orchestration.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/best-practices/fine-grained-cpu-orchestration.md new file mode 100644 index 000000000..92851eb8c --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/best-practices/fine-grained-cpu-orchestration.md @@ -0,0 +1,259 @@ +# Coordinated sharing of CPU resources in Colocation Scenarios - Fine-grained CPU Orchestration + +## Introduction + +In a cloud-native environment, users often deploy different types of workloads in the same cluster, leveraging different peak effects of different services to achieve time-sharing multiplexing of resources and avoid resource waste. However, colocation of different types of workloads often leads to resource competition and mutual interference. The most typical scenario is the colocation of online and offline workloads. When more computing resources are occupied by offline workloads, the response time of online loads will be affected; when more computing resources are occupied by online workloads for a long time, the task completion time of offline workloads cannot be guaranteed. This phenomenon belongs to the Noisy Neighbor problem. + +Depending on the degree of colocation and resource types, there are many different ways to solve this problem. Quota management can limit the resource usage of loads from the entire cluster dimension, and Koordinator provides multi-level elastic quota management functions in this regard. From the single-node level, CPU, memory, disk IO, and network resources may be shared by different loads. Koordinator has provided some resource isolation and guarantee capabilities on CPU and memory, and related capabilities on disk IO and network resources are under construction. + +This article mainly introduces how Koordinator helps loads (online and online, online and offline) share CPU resources collaboratively when different types of workloads are colocated on the same node. + +## Problem Description + +The essence of CPU resource Noisy Neighbor is that different workloads share CPU resources without coordination. +1. The default resource model of Kubernetes uses cgroup (cfs quota) to limit the access of different loads to CPU resources in terms of CPU time usage. In this case, some workloads may be switched to CPU cores by the operating system scheduler. Since different CPU cores have different memory access time to different physical locations, switching cpu cores will result in longer memory access time, thus affecting load performance, thereby affecting load performance. +2. In NUMA architecture, SMT threads (logical cores) share execution units and L2 cache of physical cores. +When there are multiple workloads on the same physical core, resource contention will happen between different workloads, resulting in load performance degradation. + +Kubernetes provides topology manager and CPU manager on node level to solve the above problems. However, this feature will only attempt to take effect after the Pod has been scheduled on the machine. This may lead to the situation where Pods are scheduled to nodes with sufficient CPU resources but topology requirements are not met. + +## Solutions + +### Application-Oriented CPU Orchestration QoS Semantics + +In response to the above problems and deficiencies, Koordinator designed an application-oriented QoS semantics and CPU orchestration protocol, as shown in the figure below. + +![img](/img/qos-cpu-orchestration.png) + +LS (Latency Sensitive) is applied to typical microservice loads, and Koordinator isolates it from other latency-sensitive loads to ensure its performance. LSR (Latency Sensitive Reserved) is similar to Kubernetes' Guaranteed. On the basis of LS, it adds the semantics that applications require reserved binding cores. LSE (Latency Sensitive Exclusive) is common in applications that are particularly sensitive to CPU, such as middleware. In addition to satisfying its semantics similar to LSR's requirement to bind cores, Koordinator also ensures that the allocated CPU is not shared with any other load. + +Also, to improve resource utilization, BE workloads can share CPU with LSR and LS. To ensure that latency-sensitive applications shared with BE are not disturbed by it, Koordinator provides strategies such as interference detection and BE suppression. The focus of this article is not here, readers can pay attention to follow-up articles. + +### Rich CPU scheduling strategies + +For LSE applications, when the machine is a hyper-threaded architecture, only logical cores can be guaranteed to be exclusive to the load. In this way, when there are other loads on the same physical core, application performance will still be disturbed. +To this end, Koordinator supports users to configure rich CPU scheduling policies on pod annotation to improve performance. + +CPU orchestration policies are divided into CPU-binding policies and CPU-exclusive policies. The CPU binding strategy determines the distribution of logical cores assigned to the application among physical cores, which can be spread or stacked among physical cores. Stacking (FullPCPU) refers to allocating complete physical cores to applications, which can effectively alleviate the Noisy Neighbor problem. SpreadByPCPU is mainly used in some delay-sensitive applications with different peak and valley characteristics, allowing the application to fully use the CPU at a specific time. The CPU exclusive policy determines the exclusive level of logical cores assigned to the application, and it can try to avoid physical cores or NUMANodes that have been applied for with the exclusive policy. + +### Enhanced CPU Scheduling Capabilities + +Koordinator supports the configuration of NUMA allocation strategies to determine how to select satisfactory NUMA nodes during scheduling. MostAllocated indicates allocation from the NUMA node with the least available resources, which can reduce fragmentation as much as possible and leave more allocation space for subsequent loads. However, this approach may cause the performance of parallel code that relies on Barriers to suffer. DistributeEvenly means that evenly distributing CPUs on NUMA nodes can improve the performance of the above parallel code. LeastAllocated indicates allocation from the NUMA node with the most available resources. + +In addition, Koordinator's CPU allocation logic is completed in the central scheduler. In this way, there will be a global perspective, avoiding the dilemma of single-node solution, where CPU resources may be sufficient but topology requirements are not met. + +## Best Practices +As can be seen from the above, Koordinator's fine-grained CPU orchestration capability can significantly improve the performance of CPU-sensitive workloads in multi-application colocation scenarios. In order to allow readers to use Koordinator’s fine-grained CPU scheduling capabilities more clearly and intuitively, this article deploys online applications to clusters in different ways, and observes the latency of services in stress testing to judge the effect of CPU scheduling capabilities. + +In this article, multiple online applications will be deployed on the same machine and pressure tested for 10 minutes to fully simulate the CPU core switching scenarios that may occur in production practice. For the colocation of online and offline applications, Koordinator provides strategies such as interference detection and BE suppression. The focus of this article is not here, and readers can pay attention to the practice in subsequent articles. + +|Group Number|Deployment Mode|Description|Scenarios| +|-|-|-|-| +|A|10 online applications are deployed on the nodes, and each node applies for 4 CPUs, all using kubernetes guaranteed QoS|Koordinator does not provide fine-grained CPU orchestration capabilities for applications|Due to CPU core switching, applications share logical cores, application performance will be affected, and it is not recommended to use| +|B|Deploy 10 online applications on the nodes, each application node has 4 CPUs, all adopt LSE QoS, CPU binding strategy adopts physical core binpacking(FullPCPUs)|Koordinator provides CPU core binding capability for LSE Pod and online applications will not share physical cores|Particularly sensitive online scenarios, application cannot accept CPU sharing at the physical core level| +|C|Deploy 10 online applications on the node, each application node has 4 CPUs, all adopt LSR QoS, CPU binding strategy adopts physical core split (SpreadByPCPUs), use CPU exclusively by physical cpu level|Koordinator provides CPU binding core capability for LSR Pod and online application logical core can use more physical core capacity|It is often used to share physical cores with offline Pods and implement time-sharing multiplexing at the physical core level. This article does not focus on the mixed deployment of online and offline applications, so it only tests the overuse of online applications| + +This experiment uses the following performance indicators to evaluate the performance of Nginx applications under different deployment methods: + +- RT (Response Time) quantile value: RT is a performance indicator that online applications usually focus on. The lower the RT, the better the online service performance. The RT indicator is obtained by collecting the information printed after the wrk pressure tests. In the experiment, it reflects the time it takes for the Nginx application to respond to the wrk request. For example, RT-p50 indicates the maximum time (median) it takes for Nginx to respond to the first 50% of wrk requests, and RT-p90 indicates the maximum time it takes for Nginx to respond to the first 90% of wrk requests. +- RPS (Request Per Second): RPS is the number of requests served by an online application per second. The more RPS a service bears, the better the performance of the online service. + + +The experimental results are as follows: + +|Performance Indicators/Deployment Mode| A(colocation of two online applications, Guaranteed)|B(colocation of two online applications, LSE、FullPCPU)|C(colocation of two online applications, LSR、SpreadByPCPU、PCPULevel| +|-|-|-|-| +|RPS| 114778.29|114648.19|115268.50| +|RT-avg (ms)|3.46 ms|3.33 ms|3.25 ms| +|RT-p90 (ms)|5.27 ms|5.11 ms|5.06 ms| +|RT-p99 (ms)|15.22 ms|12.61 ms|12.14 ms| + +- Comparing B and A, it can be found that after adopting LSE QoS to bind the core, the service response time P99 is significantly reduced, and the long tail phenomenon is well alleviated +- Comparing C and B, it can be found that after using LSR QoS to bind cores and allowing logical cores to occupy more physical core resources, more requests can be tolerated with better service response time + +In summary, in the scenario where online services are deployed on the same machine, using Koordinator to refine the CPU arrangement can effectively suppress the Noisy Neighbor problem and reduce the performance degradation caused by CPU core switching. + +### Environemnt + +First, prepare a Kubernetes cluster and install Koordinator. This article chooses two nodes of a Kubernetes cluster to do the experiment, one of the nodes is used as a test machine, which will run the Nginx online server; the other node is used as a pressure test machine, which will run the client's wrk, request the Nginx online server, and make pressure test requests . + +### Online application deployment + +1. Inject fine-grained CPU orchestration protocols into applications using ColocationProfile + + Group B fine-grained CPU orchestration protocol + + ```yaml + apiVersion: config.koordinator.sh/v1alpha1 + kind: ClusterColocationProfile + metadata: + name: colocation-profile-example + spec: + selector: + matchLabels: + app: nginx + # 采用 LSE QoS + qosClass: LSE + annotations: + # 采用物理核间堆叠 + scheduling.koordinator.sh/resource-spec: '{"preferredCPUBindPolicy":"FullPCPUs"}' + priorityClassName: koord-prod + ``` + + Group C fine-grained CPU orchestration protocol + + ```yaml + apiVersion: config.koordinator.sh/v1alpha1 + kind: ClusterColocationProfile + metadata: + name: colocation-profile-example + spec: + selector: + matchLabels: + app: nginx + # 采用 LSR QoS + qosClass: LSR + annotations: + # 采用物理核间打散且独占物理核 + scheduling.koordinator.sh/resource-spec: '{"preferredCPUBindPolicy":"SpreadByPCPUs", "preferredCPUExclusivePolicy":"PCPULevel"}' + priorityClassName: koord-prod + ``` + +2. This article uses Nginx server as Online Service , Pod YAML is as follows: + + ```yaml + --- + # nginx应用配置 + apiVersion: v1 + data: + config: |- + user nginx; + worker_processes 4; # Nginx的Worker个数,影响Nginx Server的并发。 + + events { + worker_connections 1024; # 默认值为1024。 + } + + http { + server { + listen 8000; + + gzip off; + gzip_min_length 32; + gzip_http_version 1.0; + gzip_comp_level 3; + gzip_types *; + } + } + + #daemon off; + kind: ConfigMap + metadata: + name: nginx-conf-0 + --- + # Nginx实例,作为在线类型服务应用。 + apiVersion: v1 + kind: Pod + metadata: + labels: + app: nginx + name: nginx-0 + namespace: default + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - "${node_name}" + schedulerName: koord-scheduler + priorityClassName: koord-prod + containers: + - image: 'koordinatorsh/nginx:v1.18-koord-exmaple' + imagePullPolicy: IfNotPresent + name: nginx + ports: + - containerPort: 8000 + hostPort: 8000 # 压测请求访问的端口。 + protocol: TCP + resources: + limits: + cpu: '4' + memory: 8Gi + requests: + cpu: '4' + memory: 8Gi + volumeMounts: + - mountPath: /apps/nginx/conf + name: config + hostNetwork: true + restartPolicy: Never + volumes: + - configMap: + items: + - key: config + path: nginx.conf + name: nginx-conf-0 + name: config + ``` + +3. Execute the following command to deploy the Nginx application. + + ```bash + kubectl apply -f nginx.yaml + ``` + +4. Execute the following command to view the Pod status of the Nginx application. + + ```bash + kubectl get pod -l app=nginx -o wide + ``` + + You can see output similar to the following, indicating that the Nginx application has been running normally on the test machine. + + ``` + NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES + nginx-0 1/1 Running 0 2m46s 10.0.0.246 test-machine-name + + ``` + +### Load Test + +1. On the testing machine, execute the following command to deploy the stress testing tool wrk. + + ```bash + wget -O wrk-4.2.0.tar.gz https://github.com/wg/wrk/archive/refs/tags/4.2.0.tar.gz && tar -xvf wrk-4.2.0.tar.gz + cd wrk-4.2.0 && make && chmod +x ./wrk + ``` + +2. On the testing machine, execute the following command to deploy the load testing tool wrk + + ```bash + # node_ip填写测试机的IP地址,用于wrk向测试机发起压测;8000是Nginx暴露到测试机的端口。 + taskset -c 32-45 ./wrk -t120 -c400 -d600s --latency http://${node_ip}:8000/ + ``` + +3. After waiting for wrk to finish running, obtain the pressure test results of wrk. The output format of wrk is as follows. Repeat the test several times to obtain relatively stable results. + + ``` + Running 10m test @ http://192.168.0.186:8000/ + 120 threads and 400 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 3.29ms 2.49ms 352.52ms 91.07% + Req/Sec 0.96k 321.04 3.28k 62.00% + Latency Distribution + 50% 2.60ms + 75% 3.94ms + 90% 5.55ms + 99% 12.40ms + 68800242 requests in 10.00m, 54.46GB read + Requests/sec: 114648.19 + Transfer/sec: 92.93MB + ``` + +## Conclusion + +In a Kubernetes cluster, there may be competition for resources such as CPU and memory among different business loads, which affects the performance and stability of the business. In the face of the Noisy Neighbor phenomenon, users can use Koordinator to configure more refined CPU scheduling policies for applications, so that different applications can share CPU resources collaboratively. We have shown through experiments that Koordinator's fine-grained CPU scheduling capability can effectively suppress the competition for CPU resources and improve application performance. \ No newline at end of file diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/descheduler-framework.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/descheduler-framework.md new file mode 100644 index 000000000..e054a557a --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/descheduler-framework.md @@ -0,0 +1,84 @@ +# Descheduler Framework + +## Summary + +This proposal is based on the K8s community's [descheduler](https://github.com/kubernetes-sigs/descheduler) to design and implement the descheduler framework required by the koordinator. + +## Motivation + +The existing [descheduler](https://github.com/kubernetes-sigs/descheduler) in the community can solve some problems, but we think that there are still many aspects of the descheduler that can be improved, for example, it only supports the mode of periodic execution, and does not support the event-triggered mode. It is not possible to extend and configure custom rescheduling strategies without invading the existing code of descheduler like kube-scheduler; it also does not support implementing custom evictor. + +We also noticed that the K8s descheduler community also found these problems and proposed corresponding solutions such as [#753 Descheduler framework Proposal](https://github.com/kubernetes-sigs/descheduler/issues/753) and [PoC #781](https://github.com/kubernetes-sigs/descheduler/pull/781). The K8s descheduler community tries to implement a descheduler framework similar to the k8s scheduling framework. This coincides with our thinking. + +On the whole, these solutions solved most of our problems, but we also noticed that the related implementations were not merged into the main branch. But we review these implementations and discussions, and we believe this is the right direction. Considering that Koordiantor has clear milestones for descheduler-related features, we will implement Koordinator's own descheduler independently of the upstream community. We try to use some of the designs in the [#753 PR](https://github.com/kubernetes-sigs/descheduler/issues/753) proposed by the community and we will follow the Koordinator's compatibility principle with K8s to maintain compatibility with the upstream community descheduler when implementing. Such as independent implementation can also drive the evolution of the upstream community's work on the descheduler framework. And when the upstream community has new changes or switches to the architecture that Koordinator deems appropriate, Koordinator will follow up promptly and actively. + +### Goals + +1. Implement Koordinator Descheduler following part of the design in [#753](https://github.com/kubernetes-sigs/descheduler/issues/753) proposed by the community + +### Non-Goals/Future Work + +1. Break any existing use cases of the Descheduler. + +## Proposal + +### Implementation Details/Notes/Constraints + +#### Descheduler profile + +The current descheduler configuration is too simple to support disabling or enabling plugins or supporting custom plugin configurations. The [PR #587](https://github.com/kubernetes-sigs/descheduler/pull/587) introducing descheduler profiles with v1alpha2 api version. We will use this proposal as Koordiantor Descheduler's configuration API. + +- The descheduler profile API supports user specify which extension points are enabled/disabled, alongside specifying plugin configuration. Including ability to configure multiple descheduling profiles. +- The descheduling framework configuration can be converted into an internal representation. +- To reduce need to specify value for every possible configuration, also defaulting serves as a recommended/opinionated settings for the plugins. + +#### Abstract PodEvictor interface + +Currently, descheduler has split `Pod Evictor` and `Evictor Filter`. Users can inject `Evictor Filter` on demand, and the plug-in calls `Evictor Filter` when selecting abnormal Pods to select Pods that meet the requirements and calls `Pod Evictor` to initiate eviction. At present, `Pod Evictor` has not been abstracted as an interface. We adopt the solution in [PoC #781](https://github.com/kubernetes-sigs/descheduler/pull/781) to abstract an `Evictor interface`. And refer to [PR #885](https://github.com/kubernetes-sigs/descheduler/pull/885) to add an `EvictOptions` paramters. We can implement custom Evictor based on [PodMigrationJob](https://github.com/koordinator-sh/koordinator/blob/main/docs/proposals/scheduling/20220701-pod-migration-job.md). + +The `Evictor` interface defined as follows: + +```go +type EvictOptons struct { + // PluginName represents the initiator of the eviction operation + PluginName string + // Reason allows for passing details about the specific eviction for logging. + Reason string + // DeleteOptions holds the arguments used to delete + DeleteOptions *metav1.DeleteOptions +} + +// Plugin is the parent type for all the descheduling framework plugins. +type Plugin interface { + Name() string +} + +type Evictor interface { + Plugin + // Filter checks if a pod can be evicted + Filter(pod *corev1.Pod) bool + // Evict evicts a pod (no pre-check performed) + Evict(ctx context.Context, pod *corev1.Pod, evictOptions EvictOptions) bool +} +``` + +#### Plug-in descheduler strategy + +The current descheduler has some strategies. In [PoC #781](https://github.com/kubernetes-sigs/descheduler/pull/781), it is converted into `Plugin` and executed periodically. In this `periodic execution mode`, it is appropriate to abstract the policy for Pod and Node dimensions as `DeschedulePlugin` or `BalancePlugin`. The load hotspot descheduling capability that we will implement later can also implement the BalancePlugin interface. + +The `DeschedulePlugin` and `BalancePlugin` interfaces defined as follows: + +```go +type DeschedulePlugin interface { + Plugin + Deschedule(ctx context.Context, nodes []*corev1.Node) *Status +} + +type BalancePlugin interface { + Plugin + Balance(ctx context.Context, nodes []*corev1.Node) *Status +} +``` + +We also need to support the `event-triggered mode`, which means that descheduling is performed in the form of a Controller. +In some scenarios, CRD-oriented descheduling needs to be implemented. For example, different descheduling configurations are provided according to the workload. When some abnormality is detected in the workload, descheduling will be triggered. We can think of Controller as a special form of Plugin. When the descheduler is initialized, an instance is constructed through the plugin factory function like a normal Plugin, and then a similar Run method is called to start execution. \ No newline at end of file diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/enhanced-scheduler-extension.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/enhanced-scheduler-extension.md new file mode 100644 index 000000000..8c61c719d --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/enhanced-scheduler-extension.md @@ -0,0 +1,232 @@ +# Enhanced Scheduler Extension + +## Summary + +This proposal describes how to extend the kubernetes scheduling framework without modify upstream codes to support the scheduling features that Koordinator needs to develop. + +## Motivation + +Although Kubernetes Scheduler provides the scheduling framework to help developer to extend scheduling features. However, it cannot support the features that Koordinator needs to develop, such as Reservation, problem diagnosis and analysis, etc. + +### Goals + +1. Provides scheduling extension point hook mechanism +1. Provides scheduling plugins expose state mechanism to help diagnose analysis problems + +### Non-Goals/Future Work + + +## Proposal + +### User stories + +#### Story 1 + +Koordiantor supports users to use `Reservation` CRD to reserve resources. We expect Reservation CRD objects to be scheduled like Pods. In this way, the native scheduling capabilities of Kubernetes and other extended scheduling capabilities can be reused. This requires a mechanism to disguise the Reservation CRD object as a Pod, and to extend some scheduling framework extension points to support updating the Reservation Status. + +#### Story 2 + +Koordinator provides some scheduling plugins, such as Fine-grained CPU Scheduling, Device Share Scheduling, Coscheduling, ElasticQuota, etc. These plugins are brand new, and the supported scenarios are relatively rich, and the internal logic and state of the plugins are also a bit complicated. When we may encounter some problems in the production environment and need to be diagnosed and analyzed, we need to confirm the cause of the problem based on the internal status of the plugin. But currently the kubernetes scheduling framework does not provide a mechanism to export the internal state of the plugin. + +#### Story 3 + +The scheduler provides many plugins, and most plugins implement Scoring Extension Point. How to configure the weights of these plugins needs to be decided in combination with specific problems. When the optimal node is selected according to the scoring results, the results may not meet expectations. At this point we need to be able to trace or debug these scoring results in some way. But there is currently no good way. + +### Design Details + +#### Enhancement Kubernetes Scheduling Framework principles + +At present, the kube-scheduler provided by Kubernetes can be divided into several parts. The outermost layer is `k8s.io/kubernetes/cmd/kube-scheduler`, which is the entrance of kube-scheduler; `k8s.io/kubernetes/pkg/scheduler` is responsible for integrating the framework And execute scheduling workflow, including initializing framework and plugins, scheduling Pod, etc. The core module is `k8s.io/kubernetes/pkg/scheduler/framwork`, which is the **Kubernetes Scheduling Framework**. + +Each layer provides some interfaces or methods to support developers to extend some capabilities, and the evolution speed of each layer is also different. Generally speaking, the evolution speed of the more core modules should be slower, and the evolution of core modules tends to extend rather than modify the existing interface or extension mechanism, otherwise it will bring very large cost and reliability to external dependencies. question. But each layer does not support implementing some features for some reason. But as far as the problems Koordinator is currently experiencing, there are still some workarounds. However, some principles need to be followed to reduce future conflicts with the evolution of the upstream Kubernetes community. + +1. DO NOT modify the Kubernetes Scheduling Framework. The scheduling framework is the core module of kube-scheduler and is still evolving. In order to avoid conflict with the upstream community between Koordinator's enhanced capabilities. +1. DO NOT modify the `k8s.io/kubernetes/pkg/scheduler` but can implements supported interfaces or high-order functions, such as `ScheduleAlgorithm`, `NextPod`, `Error` and `Profiles`. The `Profiles` contains an instance of the Framework interface corresponding to each KubeSchedulerProfile. We can implement the Framework and replace the instances in Profiles to get the opportunity to participate in the scheduling process to do something. +1. Extend `k8s.io/kubernetes/cmd/kube-scheduler` as simply as possible. + +#### Custom Extension Overview + +![image](/img/scheduler-extension.jpg) + +#### ExtendedHandle + +ExtendedHandle extends the k8s scheduling framework `Handle` interface to facilitate plugins to access Koordinator's resources and states. +Before constructs the `k8s.io/kubernetes/pkg/scheduler.Scheduler` object, we should build an ExtendedHandle object and pass the object to each custom plugins. + +```go +type ExtendedHandle interface { + framework.Handle + KoordinatorClientSet() koordinatorclientset.Interface + KoordinatorSharedInformerFactory() koordinatorinformers.SharedInformerFactory + SnapshotSharedLister() framework.SharedLister +} +``` + +#### Intercept plugin initialization process + +In order to pass the `ExtendedHandle` object to each custom plugins, we should intercept the plugin initialization process. +And we expect that any customized plugins can be directly and seamlessly integrated into the koordinator scheduler, so the `PluginFactory` of the plugin will not be changed. Therefore, we can modify the prototype of `k8s.io/kubernetes/cmd/kube-scheduler/app.Option` and the implementation of `k8s.io/kubernetes/cmd/kube-scheduler/app.WithPlugin` as the follows to get the opportunity to intercept the plugin initialization process. + +When the custom plugin is registered to the out-of registry using `WithPlugin`, it will use `frameworkext.PluginFactoryProxy` to wrap the plugin's original `PluginFactory`. We finally complete the interception of the plugin initialization process in `frameworkext.PluginFactoryProxy`. + +Of course, we will not modify `k8s.io/kubernetes/cmd/kube-scheduler` directly. Considering that the logic of `k8s.io/kubernetes/cmd/kube-scheduler` itself is not complicated, it will basically not bring us additional maintenance costs, so we will copy the relevant code to Koordinator for separate maintenance. + + +```go + +// Option configures a framework.Registry. +type Option func(frameworkext.ExtendedHandle, runtime.Registry) error + +// WithPlugin creates an Option based on plugin name and factory. Please don't remove this function: it is used to register out-of-tree plugins, +// hence there are no references to it from the kubernetes scheduler code base. +func WithPlugin(name string, factory runtime.PluginFactory) Option { + return func(handle frameworkext.ExtendedHandle, registry runtime.Registry) error { + return registry.Register(name, frameworkext.PluginFactoryProxy(handle, factory)) + } +} + +// frameworkext.PluginFactoryProxy +func PluginFactoryProxy(extendHandle ExtendedHandle, factoryFn frameworkruntime.PluginFactory) frameworkruntime.PluginFactory { + return func(args runtime.Object, handle framework.Handle) (framework.Plugin, error) { + impl := extendHandle.(*frameworkExtendedHandleImpl) + impl.once.Do(func() { + impl.Handle = handle + }) + return factoryFn(args, extendHandle) + } +} +``` + +#### Expose the internal state of plugins + +We will define a new extension interface to help the plugin expose the internal state through the Restful API, and provide some built-in Restful APIs to query which APIs are exposed by the current scheduler and some commonly used internal data, such as NodeInfo, etc. + +The new extension interface named `APIServiceProvider`. The plugins can implement this interface to register the API to be exposed as needed. When the plugin is initialized, `frameworkext.PluginFactoryProxy` will check whether the newly constructed plugin implements `APIServiceProvider`, and if so, it will call the `RegisterEndpoints` method of the interface to register the API. The Restful APIs exposed by these plugins will be bound to the URL path `/apis/v1/plugins/` and will be prefixed with the name of each plugin. For example, the API `/availableCPUs/:nodeName` exposed by the plugin `NodeNUMAResource` will be converted to `/apis/v1/plugins/NodeNUMAResource/availableCPUs/:nodeName`. + + +```go +type APIServiceProvider interface { + RegisterEndpoints(group *gin.RouterGroup) +} + +type ErrorMessage struct { + Message string `json:"message,omitempty"` +} + +func ResponseErrorMessage(c *gin.Context, statusCode int, format string, args ...interface{}) { + var e ErrorMessage + e.Message = fmt.Sprintf(format, args...) + c.JSON(statusCode, e) +} +``` + +Users can use the built-in API `/apis/v1/__services__` to query how many Restful APIs are provided by the current scheduler. The response as the follows: + +```json +{ + "GET": [ + "/apis/v1/__services__", + "/apis/v1/nodes/:nodeName", + "/apis/v1/plugins/Coscheduling/gang/:namespace/:name", + "/apis/v1/plugins/Coscheduling/gangs", + "/apis/v1/plugins/NodeNUMAResource/availableCPUs/:nodeName", + "/apis/v1/plugins/NodeNUMAResource/cpuTopologyOptions/:nodeName" + ] +} +``` + +Koordinator scheduler also provides `/apis/v1/nodes/:nodeNa` to expose internal `NodeInfo` to developers. + + +#### Support plugin to create Controllers + +Similar to Coscheduling/ElasticQuota Scheduling, these scheduling plugins have a matching Controller to synchronize the status of the related CRD. The most common way is to deploy these controllers independently of the scheduler. This method will not only bring additional maintenance costs and resource costs, but also if there are more states in the scheduling plugin that need to be synchronized to the CRD Status, the logic in the Controller and the logic in the plugin need to be more closely coordinated. The best way is that the Controller and the scheduling plugin are in the same process. + +We can define a new interface called `ControllerProvider`. When the plugin is initialized, `frameworkext.PluginFactoryProxy` will check whether the newly constructed plugin implements `ControllerProvider`, and if so, it will call the `NewControllers` method of the interface to get the instances of Controllers, and save these instances in the `ExtendedHandle`. When the scheduler gets the leader role, it can trigger the `ExtendedHandle` to start these controllers. + +```go +type ControllerProvider interface { + NewControllers() ([]Controller, error) +} + +type Controller interface { + Start() + Name() string +} +``` + + +#### Debug Scoring Result + +If we want to support debug scoring results, the easiest way is to directly modify `Framework.RunScorePlugins` and print the results after scoring. But this goes against the extend principles we laid out earlier. But we can think differently. When `scheduler.Scheduler` executes `scheduleOne`, it obtains an instance of the `framework.Framework` interface from `Profiles` and calls the method `RunScorePlugins`. At the same time, considering that we have maintained the initialization code of scheduler separately, then we can customize the implementation of the `framework.Framework` interface, implement the method `RunScorePlugins` and take over the `Profiles` in `scheduler.Scheduler`. In this way, we can first call the `RunScorePlugins` method of the original `framework.Framework` interface instance in the custom implemented `RunScorePlugins`, and then print the result. + +For the processing of the results, we can simply print it to the log in markdown format. When needed, enable Scoring Result debugging capability through the HTTP interface `/debug/flags/s` like `/debug/flags/v`. The developers also enable the capability via flags `--debug-scores`. + +```bash +# print top 100 score results. +$ curl -X POST schedulerIP:10251/debug/flags/s --data '100' +successfully set debugTopNScores to 100 +``` + +The following are the specific scoring results: + + +``` +| # | Pod | Node | Score | ImageLocality | InterPodAffinity | LoadAwareScheduling | NodeAffinity | NodeNUMAResource | NodeResourcesBalancedAllocation | NodeResourcesFit | PodTopologySpread | Reservation | TaintToleration | +| --- | --- | --- | ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| +| 0 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.51 | 577 | 0 | 0 | 87 | 0 | 0 | 96 | 94 | 200 | 0 | 100 | +| 1 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.50 | 574 | 0 | 0 | 85 | 0 | 0 | 96 | 93 | 200 | 0 | 100 | +| 2 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.19 | 541 | 0 | 0 | 55 | 0 | 0 | 95 | 91 | 200 | 0 | 100 | +| 3 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.18 | 487 | 0 | 0 | 15 | 0 | 0 | 90 | 82 | 200 | 0 | 100 | +``` + +| # | Pod | Node | Score | ImageLocality | InterPodAffinity | LoadAwareScheduling | NodeAffinity | NodeNUMAResource | NodeResourcesBalancedAllocation | NodeResourcesFit | PodTopologySpread | Reservation | TaintToleration | +| --- | --- | --- | ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| +| 0 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.51 | 577 | 0 | 0 | 87 | 0 | 0 | 96 | 94 | 200 | 0 | 100 | +| 1 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.50 | 574 | 0 | 0 | 85 | 0 | 0 | 96 | 93 | 200 | 0 | 100 | +| 2 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.19 | 541 | 0 | 0 | 55 | 0 | 0 | 95 | 91 | 200 | 0 | 100 | +| 3 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.18 | 487 | 0 | 0 | 15 | 0 | 0 | 90 | 82 | 200 | 0 | 100 | + + +#### Custom Hook Extend Points to Support Reservation Scheduling + +If we want to schedule the Reservation CRD object in the form of Pod, we need to solve several problems: + +1. Before calling `PreFilter`, check whether the Pod has a matching Reservation. If there is a matching Reservation, and if the Pod is set with `Pod Affinity/AntiAffinity` or `TopologySpreadConstraints`, we need to modify the Pod to remove these fields. The reason is that when the Reservation CRD object is created, the user generally sets these fields, and expects to find suitable nodes to reserve resources according to these scheduling constraints. Therefore, if the Pod is scheduled with the same fields, it will cause the scheduling to fail. To do this, it cannot be achieved by implementing the `PreFilter` extension point, because the scheduler has already obtained the appropriate Pod to start executing when calling `PreFilter`, and we have lost the opportunity to modify the Pod to affect other plugins. +1. In the `Filter` phase, we also need to update the NodeInfo. If there is a Reservation CRD object on NodeInfo, and the current Pod matches the Reservation CRD object, then the resources applied for by the Reservation CRD object should be returned to NodeInfo. Only in this way can it pass the resource check of the scheduler, including the network port check. + +To solve these problems, we define the `Hook` interface. The plugin can be implemented on demand, and the Pod or NodeInfo can be modified when the PreFilter/Filter is executed. Similar to the custom implementation method `RunScorePlugins` mentioned above, we can customize the implementation methods `RunPreFilterPlugins` and `RunFilterPluginsWithNominatedPods`. Before executing the real extension point logic, first execute the `Hook` interface and modify the Pod and NodeInfo. + +If necessary, you can modify the Pod or Node before executing the Score Extension Point by implementing ScorePhaseHook. + +Considering that there may be multiple different Hooks to modify the Pod or NodeInfo requirements, when the Hook is called, the Hook will be called cyclically, and the modification result of the previous Hook and the input of the next Hook will continue to be executed. + +Here are some additional explanations for the scenarios in which these new extension points should be used. If you can complete the scheduling function through the extension points such as Filter/Score provided by the K8s Scheduling Framework without modifying the incoming NodeInfo/Pod and other objects, you do not need to use these new extension points. + +```go +type SchedulingPhaseHook interface { + Name() string +} + +type PreFilterPhaseHook interface { + SchedulingPhaseHook + PreFilterHook(handle ExtendedHandle, state *framework.CycleState, pod *corev1.Pod) (*corev1.Pod, bool) +} + +type FilterPhaseHook interface { + SchedulingPhaseHook + FilterHook(handle ExtendedHandle, cycleState *framework.CycleState, pod *corev1.Pod, nodeInfo *framework.NodeInfo) (*corev1.Pod, *framework.NodeInfo, bool) +} + +type ScorePhaseHook interface { + SchedulingPhaseHook + ScoreHook(handle ExtendedHandle, cycleState *framework.CycleState, pod *corev1.Pod, nodes []*corev1.Node) (*corev1.Pod, []*corev1.Node, bool) +} + +``` + +## Alternatives + +### Use Filter instead of Filter Hook + +We can change the order of Filter plugins to support Reservation Scheduling to update NodeInfo earlier, which can replace Filter Hook. Subsequent implementations can be implemented as an optimization. diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/fine-grained-cpu-orchestration.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/fine-grained-cpu-orchestration.md new file mode 100644 index 000000000..929cba386 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/fine-grained-cpu-orchestration.md @@ -0,0 +1,451 @@ +# 精细化 CPU 编排 + +## 摘要 + +该提案详细定义了 Koordinator QoS 的细粒度 CPU 编排,以及如何兼容 K8s 现有的设计原则和实现, 描述了 koordlet、koord-runtime-proxy 和 koord-scheduler 需要增强的功能。 + +## 动机 + +越来越多的系统利用 CPU 和硬件加速器的组合来支持延迟关键性的执行和高吞吐量并行计算。其中包括电信、科学计算、机器学习、金融服务和数据分析等领域的工作负载。这种混合系统构成高性能环境。 + +为了获得最佳性能,需要实现 CPU 隔离、NUMA-locality 相关的优化。 + +### 目标 + +1. 改进 Koordinator QoS 的 CPU 编排定义。 +1. 明确兼容 kubelet CPU Manager Policy的策略。 +1. 阐明 koordlet 应如何增强 CPU 调度机制。 +1. 为应用和集群管理员提供一套 API 支持复杂的CPU编排场景,例如 CPU 绑定策略、CPU 独占策略、NUMA 拓扑对齐策略和NUMA 拓扑信息等 +1. 提供优化 CPU 编排的 API。 + +### 非目标/未来工作 + +1. 描述 koordlet/koordlet-runtime-proxy 的具体设计细节。 +1. 描述 CPU 重调度机制的具体设计细节。 + + +## 设计概述 + +![image](/img/cpu-orchestration-seq-uml.svg) + +当 koordlet 启动时,koordlet 从 kubelet 收集 NUMA 拓扑信息,包括 NUMA 拓扑、CPU 拓扑、kubelet CPU 管理策略、kubelet 为 Guaranteed Pod 分配的 CPU 等,并更新到节点资源拓扑 CRD。当延迟敏感的应用程序扩容时,可以为新Pod设置 Koordinator QoS LSE/LSR、CPU绑定策略和 CPU独占策略,要求 koord-scheduler 分配最适合的 CPU 以获得最佳性能。当 koord-scheduler 调度 Pod 时,koord-scheduler 会过滤满足 NUMA 拓扑对齐策略的节点,并通过评分选择最佳节点,在 Reserve 阶段分配 CPU,并在 PreBinding 时将结果记录到 Pod Annotation。koordlet 通过 Hook kubelet CRI 请求,替换通过 koord-scheduler 调度的 CPU 配置参数到运行时,例如配置 cgroup。 + +## 用户故事 + +### 故事 1 + +兼容 kubelet 现有的 CPU 管理策略。CPU 管理器 `static` 策略允许具有某些资源特征的 Pod 在节点中被授予更高的 CPU 亲和性和排他性。如果启用 `static` 策略,集群管理员必须配置 kubelet 保留一些 CPU。 `static` 策略有一些选项,如果指定了 full-pcpus-only(beta, 默认可见) 策略选项,则 `static` 策略将始终分配完整的物理内核。如果指定了 distribute-cpus-across-numa(alpha, 默认不可见) 选项,在需要多个 NUMA 节点来满足分配的情况下, `static` 策略将在 NUMA 节点之间平均分配 CPU。 + +### 故事 2 + +同样,应该兼容社区中现有的 K8s Guaranteed Pod 的语义。静态策略分配给 K8s Guaranteed Pod 的 CPU 不会共享给默认的 BestEffort Pod,所以相当于 LSE。但是当节点的负载比较低时,LSR Pod 分配的 CPU 应该与 BestEffort 的工作负载共享,以获得经济效益。 + +### 故事 3 + +拓扑管理器是一个 kubelet 组件,旨在协调负责这些优化的组件集。引入拓扑管理器后,在工作节点具有不同的 NUMA 拓扑,并且该拓扑中具有不同资源量的集群中启动 Pod 的问题成为现实。Pod 可以调度在资源总量足够的节点上,但是资源分布不能满足合适的拓扑策略。 + +### 故事 4 + +调度器支持协调编排多个延迟敏感的应用程序。例如,支持延迟敏感的应用程序多个实例在 CPU 维度上互斥,并且延迟敏感的应用和一般应用在 CPU 维度亲和。这样可以降低成本并保证运行质量。 + +### 故事 5 + +在基于 NUMA 拓扑分配 CPU 时,用户希望有不同的分配策略。例如 bin-packing 优先,或者分配最空闲的 NUMA 节点。 + +### 故事 6 + +随着应用程序的伸缩或滚动,最适合的可分配空间会逐渐变得碎片化,这会导致一些策略的分配效果不好,影响应用程序的运行时效果。 + +## 设计细节 + +### CPU 编排基本原则 + +1. 仅支持 Pod 维度的 CPU 分配机制。 +1. Koordinator 将机器上的 CPU 分为 `CPU Shared Pool`,`statically exclusive CPUs` 和 `BE CPU Shared Pool`。 + 1. `CPU Shared Pool` 是一组共享 CPU 池,K8s Burstable 和 Koordinator LS Pod 中的任何容器都可以在其上运行。K8s Guaranteed `fractional CPU requests` 的 Pod 也可以运行在 `CPU Shared Pool` 中。`CPU Shared Pool` 包含节点中所有未分配的 CPU,但不包括由 K8s Guaranteed、LSE 和 LSR Pod 分配的 CPU。如果 kubelet 保留 CPU,则 `CPU Shared Pool` 包括保留的 CPU。 + 1. `statically exclusive CPUs` 是指分配给 K8s Guaranteed、Koordinator LSE/LSR Pods 使用的一组独占 CPU。当 K8s Guaranteed、LSE 和 LSR Pods 谁申请 CPU 时,koord-scheduler 将从 `CPU Shared Pool` 中分配。 + 1. `BE CPU Shared pool` 是一组 `K8s BestEffort` 和 `Koordinator BE` 的 Pod 都可运行的 CPU 池。`BE CPU Shared pool` 包含节点中除 K8s Guaranteed 和 Koordinator LSE Pod 分配的之外的所有 CPU。 + +### Koordinator QoS CPU 编排原则 + +1. LSE/LSR Pod 的 Request 和 Limit 必须相等,CPU 值必须是 1000 的整数倍。 +1. LSE Pod 分配的 CPU 是完全独占的,不得共享。如果节点是超线程架构,只保证逻辑核心维度是隔离的,但是可以通过 `CPUBindPolicyFullPCPUs` 策略获得更好的隔离。 +1. LSR Pod 分配的 CPU 只能与 BE Pod 共享。 +1. LS Pod 绑定了与 LSE/LSR Pod 独占之外的共享 CPU 池。 +1. BE Pod 绑定使用节点中除 LSE Pod 独占之外的所有 CPU 。 +1. 如果 kubelet 的 CPU 管理器策略为 static 策略,则已经运行的 K8s Guaranteed Pods 等价于 Koordinator LSR。 +1. 如果 kubelet 的 CPU 管理器策略为 none 策略,则已经运行的 K8s Guaranteed Pods 等价于 Koordinator LS。 +1. 新创建但未指定 Koordinator QoS 的 K8s Guaranteed Pod 等价于 Koordinator LS。 + +![img](/img/qos-cpu-orchestration.png) + +### kubelet CPU Manager Policy 兼容原则 + +1. 如果 kubelet 设置 CPU 管理器策略选项 `full-pcpus-only=true/distribute-cpus-across-numa=true`,并且节点中没有 Koordinator 定义的新 CPU 绑定策略,则遵循 kubelet 定义的这些参数的定义。 +1. 如果 kubelet 设置了拓扑管理器策略,并且节点中没有 Koordinator 定义的新的 NUMA Topology Alignment 策略,则遵循 kubelet 定义的这些参数的定义。 + +### 接管 kubelet CPU 管理策略 + +kubelet 预留的 CPU 主要服务于 K8s BestEffort 和 Burstable Pods。但 Koordinator 不会遵守该策略。K8s Burstable Pods 应该使用 `CPU Shared Pool`,而 K8s BestEffort Pods 应该使用 `BE CPU Shared Pool`。Koordinator LSE 和 LSR Pod 不会从被 kubelet 预留的 CPU 中分配。 + + +1. 对于 K8s Burstable 和 Koordinator LS Pod: + 1. 当 koordlet 启动时,计算 `CPU Shared Pool` 并将共享池应用到节点中的所有 Burstable 和 LS Pod,即更新它们的 cpu cgroups, 设置 cpuset。在创建或销毁 LSE/LSR Pod 时执行相同的逻辑。 + 1. koordlet 会忽略 kubelet 预留的 CPU,将其替换为 Koordinator 定义的 `CPU Shared Pool`。 +1. 对于 K8s BestEffort 和 Koordinator BE Pod: + 1. 如果 kubelet 预留了 CPU,BestEffort Pod 会首先使用预留的 CPU。 + 1. koordlet 可以使用节点中的所有 CPU,但不包括由具有整数 CPU 的 K8s Guaranteed 和 Koordinator LSE Pod 分配的 CPU。这意味着如果 koordlet 启用 CPU Suppress 功能,则应遵循约束以保证不会影响 LSE Pod。同样,如果 kubelet 启用了静态 CPU 管理器策略,则也应排除 K8s Guaranteed Pod。 +1. 对于 K8s Guaranteed Pod: + 1. 如果 Pod 的 annotations 中有 koord-scheduler 更新的 `scheduling.koordinator.sh/resource-status`,在 Sandbox/Container 创建阶段,则会替换 kubelet CRI 请求中的 CPUSet。 + 1. kubelet 有时会调用 CRI 中定义的 Update 方法来更新容器 cgroup 以设置新的 CPU,因此 koordlet 和 koord-runtime-proxy 需要 Hook 该方法。 +1. 自动调整 `CPU Shared Pool` 大小 + 1. koordlet 会根据 Pod 创建/销毁等变化自动调整 `CPU Shared Pool` 的大小。如果 `CPU Shared Pool` 发生变化,koordlet 应该更新所有使用共享池的 LS/K8s Burstable Pod 的 cgroups。 + 1. 如果 Pod 的 annotations`scheduling.koordinator.sh/resource-status` 中指定了对应的 `CPU Shared Pool`,koordlet 在配置 cgroup 时只需要绑定对应共享池的 CPU 即可。 + +接管逻辑要求 koord-runtime-proxy 添加新的扩展点并且 koordlet 实现新的运行时插件的 Hook 。当没有安装 koord-runtime-proxy 时,这些接管逻辑也将能够实现。 + +## CPU 编排 API + +### 应用程序 CPU 编排 API + +#### Resource Spec + +Annotation `scheduling.koordinator.sh/resource-spec` 是 Koordinator 定义的资源分配 API。用户通过设置 annotation 来指定所需的 CPU 编排策略。未来,我们还可以根据需要扩展和添加需要支持的资源类型。Annotation Value 对应的定义如下: + +```go +// ResourceSpec describes extra attributes of the compute resource requirements. +type ResourceSpec struct { + PreferredCPUBindPolicy CPUBindPolicy `json:"preferredCPUBindPolicy,omitempty"` + PreferredCPUExclusivePolicy CPUExclusivePolicy `json:"preferredCPUExclusivePolicy,omitempty"` +} + +type CPUBindPolicy string + +const ( + // CPUBindPolicyDefault performs the default bind policy that specified in koord-scheduler configuration + CPUBindPolicyDefault CPUBindPolicy = "Default" + // CPUBindPolicyFullPCPUs favor cpuset allocation that pack in few physical cores + CPUBindPolicyFullPCPUs CPUBindPolicy = "FullPCPUs" + // CPUBindPolicySpreadByPCPUs favor cpuset allocation that evenly allocate logical cpus across physical cores + CPUBindPolicySpreadByPCPUs CPUBindPolicy = "SpreadByPCPUs" + // CPUBindPolicyConstrainedBurst constrains the CPU Shared Pool range of the Burstable Pod + CPUBindPolicyConstrainedBurst CPUBindPolicy = "ConstrainedBurst" +) + +type CPUExclusivePolicy string + +const ( + // CPUExclusivePolicyDefault performs the default exclusive policy that specified in koord-scheduler configuration + CPUExclusivePolicyDefault CPUExclusivePolicy = "Default" + // CPUExclusivePolicyPCPULevel represents mutual exclusion in the physical core dimension + CPUExclusivePolicyPCPULevel CPUExclusivePolicy = "PCPULevel" + // CPUExclusivePolicyNUMANodeLevel indicates mutual exclusion in the NUMA topology dimension + CPUExclusivePolicyNUMANodeLevel CPUExclusivePolicy = "NUMANodeLevel" +) +``` + +- `CPUBindPolicy` 定义CPU绑定策略。具体取值定义如下: + - `CPUBindPolicyDefault` 或空值不执行任何绑定策略。它完全由调度器插件配置决定。 + - `CPUBindPolicyFullPCPUs` 是一种 bin-packing 策略,类似于 kubelet 定义的 `full-pcpus-only=true` 选项,用于分配完整的物理内核。但是,如果节点中剩余的逻辑 CPU 数量足够,但完整的物理核心数量不足,则继续分配。该策略可以有效避免扰邻(noisy neighbor)问题。 + - `CPUBindPolicySpreadByPCPUs` 是一种打散(Spread)策略。如果节点启用了超线程,当采用该策略时,调度器将在物理内核之间均匀的分配逻辑 CPU。例如,当前节点有 8 个物理内核和 16 个逻辑 CPU。当一个 Pod 需要 8 个逻辑 CPU 并且采用 `CPUBindPolicySpreadByPCPUs` 策略时,调度器会从每个物理核中分配一个逻辑 CPU。该策略主要用于一些具有多种不同峰谷特性的延迟敏感型应用程序。它不仅可以让应用程序在特定时间充分使用 CPU,而且不会被同一物理内核上的应用程序所干扰。所以在使用这个策略时可能会出现扰邻(noisy neighbor)问题。 + - `CPUBindPolicyConstrainedBurst` 主要帮助 K8s Burstable/Koordinator LS Pod 获得更好性能的特殊策略。使用该策略时,koord-scheduler 会根据 Pod 限制过滤掉具有合适 CPU 共享池的 NUMA 节点的节点。调度成功后,调度器会更新 Pod 中的 `scheduling.koordinator.sh/resource-status`,声明要绑定的 `CPU Shared Pool`。koordlet 根据 `CPU Shared Pool` 绑定对应 NUMA Node 的 `CPU Shared Pool`。 + - 如果 `NodeResourceTopology` 中的 `kubelet.koartiator.sh/cpu-manager-policy` 选项为 `full-pcpus-only=true`,或者 Node 中的 `node.koordator.sh/cpubind-policy` 的值为 `FullPCPUsOnly`,则 koord-scheduler 会检查实例的 CPU 请求数是否满足 SMT 对齐要求,以避免调度后被 kubelet 拒绝。如果 Pod 使用 `CPUBindPolicySpreadByPCPUs` 策略或映射到物理核心数的逻辑 CPU 数量不是整数,koord-scheduler 将避免调度此类节点。 +- `CPUExclusivePolicy` 定义了 CPU 独占策略,它可以帮助解决扰邻(noisy neighbor)问题。具体值定义如下 + - `CPUExclusivePolicyDefault` 或空值不执行任何隔离策略。它完全由调度器插件配置决定。 + - `CPUExclusivePolicyPCPULevel` 在分配逻辑CPU时,尽量避开已经被同一个独占策略申请的物理核。它是对 `CPUBindPolicySpreadByPCPUs` 策略的补充。 + - `CPUExclusivePolicyNUMANodeLevel` 在分配逻辑 CPU 时,尽量避免 NUMA 节点已经被相同的独占策略申请。如果没有满足策略的 NUMA 节点,则降级为 `CPUExclusivePolicyPCPULevel` 策略。 + +对于ARM架构,`CPUBindPolicy` 只支持 `CPUBindPolicyFullPCPUs` ,`CPUExclusivePolicy` 只支持 `CPUExclusivePolicyNUMANodeLevel` 。 + +#### Resource Status + +Annotation `scheduling.koordinator.sh/resource-status` 表示资源分配结果。 koord-scheduler 在绑定 Pod 到节点之前修改 annotation。 koordlet 使用结果来配置 cgroup。 + +Annotation value 对应的定义如下: + +```go +type ResourceStatus struct { + CPUSet string `json:"cpuset,omitempty"` + CPUSharedPools []CPUSharedPool `json:"cpuSharedPools,omitempty"` +} +``` + +- `CPUSet` 表示分配的 CPU。当 LSE/LSR Pod 请求时,koord-scheduler 将更新该字段。它是 Linux CPU 列表格式的字符串。更多详细信息,[请参阅文档](http://man7.org/linux/man-pages/man7/cpuset.7.html#FORMATS) 。 +- `CPUSharedPools` 表示 LS Pod 使用的所需 CPU 共享池。如果节点的标签 `node.koordinator.sh/numa-topology-alignment-policy` 带有 `Restricted/SingleNUMANode`,koord-scheduler 将为 LS Pod 找到最适合的 NUMA 节点,并更新需要 koordlet 使用指定 `CPU Shared Pool` 的字段。需要注意的是,调度器不会更新 `CPU Shared Pool` 中的 CPUSet 字段,koordlet 根据 `CPU Shared Pool` 中的 `Socket` 和 `Node` 字段绑定对应 NUMA 节点的 `CPU Shared Pool`。 + +#### 例子 + +具体例子: + +```yaml +apiVersion: v1 +kind: Pod +metadata: + annotations: + scheduling.koordinator.sh/resource-spec: |- + { + "preferredCPUBindPolicy": "SpreadByPCPUs", + "preferredCPUExclusivePolicy": "PCPULevel" + } + scheduling.koordinator.sh/resource-status: |- + { + "cpuset": "0-3" + } + name: test-pod + namespace: default +spec: + ... +``` + +### 节点 CPU 编排 API + +从集群管理员的角度来看,需要提供一些 API 来控制节点的 CPU 编排行为。 + +#### CPU 绑定策略 + +标签 `node.koordinator.sh/cpu-bind-policy` 限制了调度时如何绑定 CPU、逻辑 CPU。 + +具体的取值定义: +- `None` 或空值不执行任何策略 +- `FullPCPUsOnly` 要求调度器必须分配完整的物理内核。等效于 kubelet CPU 管理器策略选项 `full-pcpus-only=true`。 +- `SpreadByPCPUs` 要求调度器必须按照物理核维度均匀的分配CPU。 + +如果 Node 的 Label 中没有 `node.koordinator.sh/cpu-bind-policy`,则按照 Pod 或 koord-scheduler 配置的策略执行。 + +#### NUMA 分配策略 + +标签 `node.koordinator.sh/numa-allocate-strategy` 表示在调度时如何选择满意的 NUMA 节点。下面是具体的值定义: +- `MostAllocated` 表示从可用资源最少的 NUMA 节点分配。 +- `LeastAllocated` 表示从可用资源最多的 NUMA 节点分配。 +- `DistributeEvenly` 表示在 NUMA 节点上平均分配 CPU。 + +如果集群管理员没有在Node上设置标签 `node.koordinator.sh/numa-allocate-strategy`,但是 `NodeResourceTopology` 中的 `kubelet.koordinator.sh/cpu-manager-policy` 有选项 `distribute-cpus-across-numa=true`,然后按照 `distribute-cpus-across-numa` 的定义分配。 + +如果节点的标签中没有 `node.koordinator.sh/numa-allocate-strategy` 并且 `NodeResourceTopology` 中没有带有 `Distribute-cpus-across-numa` 选项的 `kubelet.koordinator.sh/cpu-manager-policy`,它将根据 koord-scheduler 配置的策略执行。 + +如果同时定义了 `node.koordinator.sh/numa-allocate-strategy` 和 `kubelet.koordinator.sh/cpu-manager-policy`,则首先使用 `node.koordinator.sh/numa-allocate-strategy`。 + +#### NUMA 拓扑对齐策略 + +标签 `node.koordinator.sh/numa-topology-alignment-policy` 表示如何根据 NUMA 拓扑对齐资源分配。策略语义遵循 K8s 社区。相当于 `NodeResourceTopology` 中的 `TopologyPolicies` 字段,拓扑策略 `SingleNUMANodePodLevel` 和 `SingleNUMANodeContainerLevel` 映射到 `SingleNUMANode` 策略。 + +- `None` 是默认策略,不执行任何拓扑对齐。 +- `BestEffort` 表示优先选择拓扑对齐的 NUMA Node,如果没有,则继续为 Pods 分配资源。 +- `Restricted` 表示每个 Pod 在 NUMA 节点上请求的资源是拓扑对齐的,如果不是,koord-scheduler 会在调度时跳过该节点。 +- `SingleNUMANode` 表示一个 Pod 请求的所有资源都必须在同一个 NUMA 节点上,如果不是,koord-scheduler 调度时会跳过该节点。 + +如果节点的 Label 中没有 `node.koordinator.sh/numa-topology-alignment-policy`,并且 `NodeResourceTopology中的TopologyPolicies=None`,则按照 koord-scheduler 配置的策略执行。 + +如果同时定义了 Node 中的 `node.koordinator.sh/numa-topology-alignment-policy` 和 `NodeResourceTopology` 中的 `TopologyPolicies=None`,则首先使用 `node.koordinator.sh/numa-topology-alignment-policy`。 + +#### 例子 + +具体例子: + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + node.koordinator.sh/cpu-bind-policy: "FullPCPUsOnly" + node.koordinator.sh/numa-topology-alignment-policy: "BestEffort" + node.koordinator.sh/numa-allocate-strategy: "MostAllocated" + name: node-0 +spec: + ... +``` + +### 节点资源拓扑 CRD + +需要上报的节点资源信息主要包括以下几类: + +- NUMA Topology,包括资源信息、CPU 信息如逻辑 CPU ID、物理 Core ID、NUMA Socket ID 和 NUMA Node ID 等。 +- kubelet 配置的拓扑管理器范围和策略。 +- kubelet 配置的 CPU 管理器策略和选项。 +- 由 kubelet 或 koord-scheduler 分配的 Pod 绑定 CPU,包括 K8s Guaranteed Pod、Koordinator LSE/LSR Pod,但 LS/BE 除外。 +- kubelet 定义的 `CPU Shared Pool`。 + +以上信息可以指导 koord-scheduler 更好地兼容 kubelet 的 CPU 管理逻辑,做出更合适的调度决策,帮助用户快速排查问题。 + +#### CRD 字段定义 + +我们使用 [NodeResourceTopology](https://github.com/k8stopologyawareschedwg/noderesourcetopology-api/blob/master/pkg/apis/topology/v1alpha1/types.go) CRD 来描述 NUMA 拓扑。社区定义的 NodeResourceTopology CRD 主要用于以下考虑: + +- NodeResourceTopology 已经包含了基本的 NUMA 拓扑信息和 kubelet TopologyManager 的 Scope 和 Policies 信息。我们可以重用现有的代码。 +- 跟上社区的发展,影响社区做出更多的改变。 + +#### 兼容 + +koordlet 周期性的创建或者更新 NodeResourceTopology 实例。NodeResourceTopology 实例名与节点名保持一致。并通过添加标签 `app.kubernetes.io/managed-by=Koordinator` 描述节点由 Koordinator 管理。 + +#### 扩展 + +目前 `NodeResourceTopology` 缺少一些信息,暂时以 annotation 或 label 的形式写在 `NodeResourceTopology` 中: + +- Annotation `kubelet.koordinator.sh/cpu-manger-policy` 描述了 kubelet CPU 管理器策略和选项。方案定义如下 + +```go +const ( + FullPCPUsOnlyOption string = "full-pcpus-only" + DistributeCPUsAcrossNUMAOption string = "distribute-cpus-across-numa" +) + +type kubeletCPUManagerPolicy struct { + Policy string `json:"policy,omitempty"` + Options map[string]string `json:"options,omitempty"` +} + +``` + +- Annotation `node.koordinator.sh/cpu-topology` 描述了详细的 CPU 拓扑。精细化的管理机制需要更详细的 CPU 拓扑信息。该方案定义如下: + +```go +type CPUTopology struct { + Detail []CPUInfo `json:"detail,omitempty"` +} + +type CPUInfo struct { + ID int32 `json:"id"` + Core int32 `json:"core"` + Socket int32 `json:"socket"` + Node int32 `json:"node"` +} +``` + +- Annotation `node.koordinator.sh/pod-cpu-allocs` 描述了 Koordinator LSE/LSR 和 K8s Guaranteed Pods 分配的 CPU。Annotation Value 定义如下: + +```go +type PodCPUAlloc struct { + Namespace string `json:"namespace,omitempty"` + Name string `json:"name,omitempty"` + UID types.UID `json:"uid,omitempty"` + CPUSet string `json:"cpuset,omitempty"` + ManagedByKubelet bool `json:"managedByKubelet,omitempty"` +} + +type PodCPUAllocs []PodCPUAlloc +``` + +- Annotation `node.koordinator.sh/cpu-shared-pools` 描述了 Koordinator 定义的 CPU 共享池。共享池主要由 Koordinator LS Pods 或 K8s Burstable Pods 使用。该方案定义如下: + +```go +type NUMACPUSharedPools []CPUSharedPool + +type CPUSharedPool struct { + Socket int32 `json:"socket"` + Node int32 `json:"node"` + CPUSet string `json:"cpuset,omitempty"` +} +``` +`CPUSet` 字段是 Linux CPU 列表格式的字符串。更多详细信息,[请参阅文档](http://man7.org/linux/man-pages/man7/cpuset.7.html#FORMATS) 。 + + +#### 创建/更新 NodeResourceTopology + +- koordlet 负责创建/更新 `NodeResourceTopology` +- 建议 koordlet 通过解析 CPU 状态检查点文件来获取现有 K8s Guaranteed Pod 的 CPU 分配信息。或者通过 kubelet 提供的 CRI 接口和 gRPC 获取这些信息。 +- 当 koord-scheduler 分配 Pod 的 CPU 时,替换 kubelet 状态检查点文件中的 CPU。 +- 建议 koordlet 从 [kubeletConfiguration](https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/) 获取 CPU 管理器策略和选项。 + +#### 例子 + +完整的 `NodeResourceTopology` 示例: + +```yaml +apiVersion: topology.node.k8s.io/v1alpha1 +kind: NodeResourceTopology +metadata: + annotations: + kubelet.koordinator.sh/cpu-manager-policy: |- + { + "policy": "static", + "options": { + "full-pcpus-only": "true", + "distribute-cpus-across-numa": "true" + } + } + node.koordinator.sh/cpu-topology: |- + { + "detail": [ + { + "id": 0, + "core": 0, + "socket": 0, + "node": 0 + }, + { + "id": 1, + "core": 1, + "socket": 1, + "node": 1 + } + ] + } + node.koordinator.sh/cpu-shared-pools: |- + [ + { + "socket": 0, + "node": 0, + "cpuset": "0-3" + } + ] + node.koordinator.sh/pod-cpu-allocs: |- + [ + { + "namespace": "default", + "name": "static-guaranteed-pod", + "uid": "32b14702-2efe-4be9-a9da-f3b779175846", + "cpu": "4-8", + "managedByKubelet": "true" + } + ] + labels: + app.kubernetes.io/managed-by: Koordinator + name: node1 +topologyPolicies: ["SingleNUMANodePodLevel"] +zones: + - name: node-0 + type: Node + resources: + - name: cpu + capacity: 20 + allocatable: 15 + available: 10 + - name: vendor/nic1 + capacity: 3 + allocatable: 3 + available: 3 + - name: node-1 + type: Node + resources: + - name: cpu + capacity: 30 + allocatable: 25 + available: 15 + - name: vendor/nic2 + capacity: 6 + allocatable: 6 + available: 6 + - name: node-2 + type: Node + resources: + - name: cpu + capacity: 30 + allocatable: 25 + available: 15 + - name: vendor/nic1 + capacity: 3 + allocatable: 3 + available: 3 + - name: node-3 + type: Node + resources: + - name: cpu + capacity: 30 + allocatable: 25 + available: 15 + - name: vendor/nic1 + capacity: 3 + allocatable: 3 + available: 3 +``` diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/fine-grained-device-scheduling.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/fine-grained-device-scheduling.md new file mode 100644 index 000000000..e27e8a951 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/fine-grained-device-scheduling.md @@ -0,0 +1,408 @@ +# Fine-grained device scheduling + +## Summary + +This proposal provides a fine-grained mechanism for managing GPUs and other devices such as RDMA and FPGA, defines a set of APIs to describe device information on nodes, including GPU, RDMA, and FPGA, and a new set of resource names to flexibly support users to apply at a finer granularity GPU resources. This mechanism is the basis for subsequent other GPU scheduling capabilities such as GPU Share, GPU Overcommitment, etc. + +## Motivation + +GPU devices have very strong computing power, but are expensive. How to make better use of GPU equipment, give full play to the value of GPU and reduce costs is a problem that needs to be solved. In the existing GPU allocation mechanism of the K8s community, the GPU is allocated by the kubelet, and it is a complete device allocation. This method is simple and reliable, but similar to the CPU and memory, the GPU will also be wasted. Therefore, some users expect to use only a portion of the GPU's resources and share the rest with other workloads to save costs. Moreover, GPU has particularities. For example, the NVLink and oversold scenarios supported by NVIDIA GPU mentioned below both require a central decision through the scheduler to obtain globally optimal allocation results. + +![image](/img/nvlink.jpg) + +From the picture, we can find that although the node has 8 GPU instances whose model is A100/V100, the data transmission speed between GPU instances is different. When a Pod requires multiple GPU instances, we can assign the Pod the GPU instances with the maximum data transfer speed combined relationship. In addition, when we want the GPU instances among a group of Pods to have the maximum data transfer speed combined relationship, the scheduler should batch allocate the best GPU instances to these Pods and assign them to the same node. + +### Goals + +1. Definition Device CRD and the Resource API. +1. Provides a reporter component in koordlet to report Device information and resource capacities. +1. Provides a scheduler plugin to support users to apply at a finer granularity GPU resources. +1. Provider a new runtime hook plugin in koordlet to support update the environments of containers with GPUs that be allocated by scheduler. + +### Non-goals/Future work + +1. Define flexible allocation strategies, such as implementing BinPacking or Spread according to GPU resources + +## Proposal + +### API + +#### Device resource dimensions + +Due to GPU is complicated, we will introduce GPU first. As we all know there is compute and GPU Memory capability for the GPU device. Generally user apply GPU like "I want 1/2/4/8 GPUs", but if node support GPU level isolation mechanism, user may apply GPU like "I want 0.5/0.25 GPU resources". Moreover, user may set different compute capability and GPU memory capability for best resource utilization, so the user want apply GPU like "I want X percent of "compute capability and Y percent of memory capability". + +We abstract GPU resources into different dimensions: + +- `kubernetes.io/gpu-core` represents the computing capacity of the GPU. Similar to K8s MilliCPU, we abstract the total computing power of GPU into one hundred, and users can apply for the corresponding amount of GPU computing power according to their needs. +- `kubernetes.io/gpu-memory` represents the memory capacity of the GPU in bytes. +- `kubernetes.io/gpu-memory-ratio` represents the percentage of the GPU's memory. + +Assuming that node A has 4 GPU instances, and the total memory of each instance is 8GB, when device reporter reports GPU capacity information to `Node.Status.Allocatable`, it no longer reports nvidia.com/gpu=4, but reports the following information: + +```yaml +status: + capacity: + kubernetes.io/gpu-core: 400 + kubernetes.io/gpu-memory: "32GB" + kubernetes.io/gpu-memory-ratio: 400 + allocatable: + kubernetes.io/gpu-core: 400 + kubernetes.io/gpu-memory: "32GB" + kubernetes.io/gpu-memory-ratio: 400 +``` + +For the convenience of users, an independent resource name `kubernetes.io/gpu` is defined. For example, when a user wants to use half of the computing resources and memory resources of a GPU instance, the user can directly declare `kubernetes.io/gpu: 50`, and the scheduler will convert it to `kubernetes.io/gpu-core: 50, kubernetes.io/gpu-memory-ratio: 50` + +For other devices like RDMA and FPGA, the node has 1 RDMA and 1 FGPA, will report the following information: + +```yaml +status: + capacity: + kubernetes.io/rdma: 100 + kubernetes.io/fpga: 100 + allocatable: + kubernetes.io/rdma: 100 + kubernetes.io/fpga: 100 +``` + +Why do we need `kubernetes.io/gpu-memory-ratio` and `kubernetes.io/gpu-memory` ? +When user apply 0.5/0.25 GPU, the user don't know the exact memory total bytes per GPU, only wants to use +half or quarter percentage of memory, so user can request the GPU memory with `kubernetes.io/gpu-memory-ratio`. +When scheduler assigned Pod on concrete node, scheduler will translate the `kubernetes.io/gpu-memory-ratio` to `kubernetes.io/gpu-memory` by the formulas: ***allocatedMemory = totalMemoryOf(GPU) * `kubernetes.io/gpu-memory-ratio`***, so that the GPU isolation can work. + +During the scheduling filter phase, the scheduler will do special processing for `kubernetes.io/gpu-memory` and `kubernetes.io/gpu-memory-ratio`. When a Pod specifies `kubernetes.io/gpu-memory-ratio`, the scheduler checks each GPU instance on each node for unallocated or remaining resources to ensure that the remaining memory on each GPU instance meets the ratio requirement. + +If the user knows exactly or can roughly estimate the specific memory consumption of the workload, he can apply for GPU memory through `kubernetes.io/gpu-memory`. All details can be seen below. + +Besides, when dimension's value > 100, means Pod need multi-devices. now only allow the value can be divided by 100. + +#### User apply device resources scenarios + +##### Compatible with `nvidia.com/gpu` + +```yaml +resources: + requests: + nvidia.com/gpu: "2" + cpu: "4" + memory: "8Gi" +``` + +The scheduler translates the `nvida.com/gpu: 2` to the following spec: + +```yaml +resources: + requests: + kubernetes.io/gpu-core: "200" + kubernetes.io/gpu-memory-ratio: "200" + kubernetes.io/gpu-memory: "16Gi" # assume 8G memory in bytes per GPU + cpu: "4" + memory: "8Gi" +``` + +##### Apply whole resources of GPU or part resources of GPU + +```yaml +resources: + requests: + kubernetes.io/gpu: "50" + cpu: "4" + memory: "8Gi" +``` + +The scheduler translates the `kubernetes.io/gpu: "50"` to the following spec: + +```yaml +resources: + requests: + kubernetes.io/gpu-core: "50" + kubernetes.io/gpu-memory-ratio: "50" + kubernetes.io/gpu-memory: "4Gi" # assume 8G memory in bytes for the GPU + cpu: "4" + memory: "8Gi" +``` + +##### Apply `kubernetes.io/gpu-core` and `kubernetes.io/gpu-memory-ratio` separately + +```yaml +resources: + requests: + kubernetes.io/gpu-core: "50" + kubernetes.io/gpu-memory-ratio: "60" + cpu: "4" + memory: "8Gi" +``` + +##### Apply `kubernetes.io/gpu-core` and `kubernetes.io/gpu-memory` separately + +```yaml +resources: + requests: + kubernetes.io/gpu-core: "60" + kubernetes.io/gpu-memory: "4Gi" + cpu: "4" + memory: "8Gi" +``` + +##### Apply RDMA + +```yaml +resources: + requests: + kubernetes.io/rdma: "100" + cpu: "4" + memory: "8Gi" +``` + +### Implementation Details + +#### Scheduling + +1. Abstract new data structure to describe resources and healthy status per device on the node. +2. Implements the Filter/Reserve/PreBind extenstion points. +3. Automatically recognize different kind devices. When a new device added, we don't need modify any code + +##### DeviceAllocation + +In the PreBind stage, the scheduler will update the device (including GPU) allocation results, including the device's Minor and resource allocation information, to the Pod in the form of annotations. + +```go +/* +{ + "gpu": [ + { + "minor": 0, + "resouurces": { + "kubernetes.io/gpu-core": 100, + "kubernetes.io/gpu-mem-ratio": 100, + "kubernetes.io/gpu-mem": "16Gi" + } + }, + { + "minor": 1, + "resouurces": { + "kubernetes.io/gpu-core": 100, + "kubernetes.io/gpu-mem-ratio": 100, + "kubernetes.io/gpu-mem": "16Gi" + } + } + ] +} +*/ +type DeviceAllocation struct { + Minor int32 + Resources map[string]resource.Quantity +} + +type DeviceAllocations map[DeviceType][]*DeviceAllocation +``` + +##### NodeDevicePlugin + +```go +var ( + _ framework.PreFilterPlugin = &NodeDevicePlugin{} + _ framework.FilterPlugin = &NodeDevicePlugin{} + _ framework.ReservePlugin = &NodeDevicePlugin{} + _ framework.PreBindPlugin = &NodeDevicePlugin{} +) + +type NodeDevicePlugin struct { + frameworkHandler framework.Handle + nodeDeviceCache *NodeDeviceCache +} + +type NodeDeviceCache struct { + lock sync.Mutex + nodeDevices map[string]*nodeDevice +} + +type nodeDevice struct { + lock sync.Mutex + DeviceTotal map[DeviceType]deviceResource + DeviceFree map[DeviceType]deviceResource + DeviceUsed map[DeviceType]deviceResource + AllocateSet map[DeviceType]*corev1.PodList +} + +// We use `deviceResource` to present resources per device. +// "0": {kubernetes.io/gpu-core:100, kubernetes.io/gpu-memory-ratio:100, kubernetes.io/gpu-memory: 16GB} +// "1": {kubernetes.io/gpu-core:100, kubernetes.io/gpu-memory-ratio:100, kubernetes.io/gpu-memory: 16GB} +type deviceResources map[int]corev1.ResourceList + +``` + +We will register node and device event handler to maintain device account. + +- In Filter, we will make-up each device request by a node(the gpu-memory example), and try compare each device free resource and Pod device request. +- In Reserve/Unreserve, we will update nodeDeviceCache's used/free resource and allocateSet. Now device selection rule just based on device minor id order. +- In PreBind, we will write DeviceAllocations to Pod's annotation. +- In Init stage, we should list all Node/Device/Pods to recover device accounts. + +#### Device Reporter + +Implements a new component called `Device Reporter` in koordlet to create or update `Device` CRD instance with the resources information and healthy status per device including GPU, RDMA and FPGA, etc. This version we only support GPU. It will execution `nccl` commands to get each minor resource just like k8s-gpu-device-plugins. We will apply community health check logic. + +#### Device CRD Scheme definition +```go +type DeviceType string + +const ( + GPU DeviceType = "gpu" + FPGA DeviceType = "fpga" + RDMA DeviceType = "rdma" +) + +type DeviceSpec struct { + Devices []DeviceInfo `json:"devices"` +} + +type DeviceInfo struct { + // UUID represents the UUID of device + UUID string `json:"id,omitempty"` + // Minor represents the Minor number of Device, starting from 0 + Minor int32 `json:"minor,omitempty"` + // Type represents the type of device + Type DeviceType `json:"deviceType,omitempty"` + // Health indicates whether the device is normal + Health bool `json:"health,omitempty"` + // Resources represents the total capacity of various resources of the device + Resources map[string]resource.Quantity `json:"resource,omitempty"` +} + +type DeviceStatus struct {} + +type Device struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec DeviceSpec `json:"spec,omitempty"` + Status DeviceStatus `json:"status,omitempty"` +} + +type DeviceList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + + Items []Device `json:"items"` +} +``` + +##### Compatible + +Considering that some users already have many existing GPU Pods in their clusters, it is necessary to ensure that Koordinator GPU Scheduling does not repeatedly allocate the GPU devices held by these GPU Pods. Therefore, koord-scheduler needs to obtain the GPU devices's information held by these existing Pods. These GPU devices are allocated by the kubelet and recorded in the local file `/var/lib/kubelet/device-plugins/kubelet_internal_checkpoint`, so the device reporter will parse the file to obtain the GPU Device ID assigned to each Pod. When parsing, it needs to exclude the Pod that allocates GPU through koord-scheduler, and finally update it to Device CRD in the form of annotation. The corresponding annotation key is `node.koordinator.sh/devices-checkpoints`, and the annotation value is defined as follows: + +```go +type PodDevicesEntry struct { + PodUID string `json:"podUID,omitempty"` + ContainerName string `json:"containerName,omitempty"` + ResourceName string `json:"resourceName,omitempty"` + DeviceIDs []string `json:"deviceIDs,omitempty"` + AllocResp []byte `json:"allocResp,omitempty"` +} + +type PodDevicesEntries []PodDevicesEntry +``` + +#### CRD Example +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: Device +metadata: + name: node-1 + annotations: + node.koordinator.sh/gpu-checkpoints: |- + [ + { + "podUID": "fa8983dc-bb76-4eeb-8dcc-556fbd44d7ce", + "containerName": "cuda-container", + "resourceName": "nvidia.com/gpu", + "deviceIDs": ["GPU-36b27e44-b086-46f7-f2dc-73c36dc65991"] + } + ] +spec: + devices: + - health: true + id: GPU-98583a5c-c155-9cf6-f955-03c189d3dbfb + minor: 0 + resources: + kubernetes.io/gpu-core: "100" + kubernetes.io/gpu-memory: 15472384Ki + kubernetes.io/gpu-memory-ratio: "100" + type: gpu + - health: true + id: GPU-7f6410b9-bdf7-f9a5-de09-aa5ec31a7124 + minor: 1 + resources: + kubernetes.io/gpu-core: "100" + kubernetes.io/gpu-memory: 15472384Ki + kubernetes.io/gpu-memory-ratio: "100" + type: gpu +status: {} +``` + +#### koordlet and koord-runtime-proxy + +Our target is to work compatible with origin k8s kubelet and k8s device plugins, so: + +1. We still allow kubelet and device plugin to allocate concrete device, which means no matter there's a k8s device +plugin or not, our design can work well. + +2. In koord-runtime-proxy, we will use Pod's `DeviceAllocation` in annotation to replace the step1's result of container's +args and envs. + +We should modify protocol between koord-runtime-proxy and koordlet to add container env: + +```go +type ContainerResourceHookRequest struct { + .... + Env map[string]string +} + +type ContainerResourceHookResponse struct { + .... + Env map[string]string +} +``` + +Then we will add a new `gpu-hook` in koordlet's runtimehooks, registered to `PreCreateContainer` stage. +We will generate new GPU env `NVIDIA_VISIBLE_DEVICES` by Pod GPU allocation result in annotation. + +The koord-runtime-proxy can see these Pod's env, we need koord-runtime-proxy to pass these environments to koordlet, and koordlet parse the GPU related env to find the concrete device ids. + +Besides, the koordlet should report GPU model to node labels same as device plugin, this is in-case Koordinator working without device-plugin. + +Finally, we should modify `ContainerResourceExecutor`'s `UpdateRequest` function in koord-runtime-proxy, and let new GPU env covering old GPU env. + +When we handle hot-update processing, we can handle the existing scheduled Pods without device allocation in Pod's annotation. If GPU allocation info is not in annotation, we will find the GPU allocations from `ContainerResourceHookRequest`'s `Env`, and we will update all GPU allocations to Device CRD instance. + +### Compatibility + +As we know, the GPU scheduling in kube-scheduler side has no any different with other scalar resources. The concrete device-level assigning is done by kubelet and GPU device plugin, which will generate container's GPU env. + +Our design has no conflict with the above process. Our device reporter reports Koordinator GPU resources for kubelet +updating node resources. Then we schedule device request in our new plugin with new device resource account. In pre-bind +stage, we will update container resources with Koordinator GPU resources, this is for kubelet to check resource limitation. +We will also add device allocation information to Pod's annotation. In node side, the k8s device plugin will first patch +container env, but we will overwrite these envs in runtimeproxy by allocation result in Pod's annotation. + +### Upgrade strategy + +If using Koordinator GPU Scheduling to schedule GPU Pods in a brand new cluster, simply install Koordinator components. + +However, if you want to upgrade to Koordinator GPU Scheduing in an existing cluster, you need to avoid GPU devices being repeatedly allocated because of switching between different scheduling mechanisms. You need to pay attention to the order when upgrading: +1. Install the Koordinator components. In particular, make sure that the koordlets are all started successfully. +2. Stop the system or platform that creates the new GPU Pod. +3. Stop the scheduler currently responsible for the GPU Pod and ensure that there are no pending GPU Pods in the current cluster. +3. Wait a few minutes to ensure that each node's koordlet creates and updates the Device CRD. +4. Modify all components that create GPU Pods to switch the schedulerName of the Pod to koord-scheduler +5. Start trying to create a GPU Pod and verify the koord-scheduler GPU Scheduling scheduling result. +6. Restore the system or platform that created the GPU Pod and the old scheduler. + +In the future Koordinator will provide a webhook to solve the upgrade existing cluster problem. The webhook will identify the GPU Pod and modify the schedulerName of the newly created GPU Pod to koord-scheduler. At the same time, the webhook will take over the Binding operation of the GPU Pod. If the Binding is not initiated by koord-scheduler, it will be rejected. + +## Unsolved Problems + +## Alternatives + +1. User can choose whether use k8s-device plugin. as mentioned above, we can compatible in both cases. diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/gang-scheduling.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/gang-scheduling.md new file mode 100644 index 000000000..6122b6efc --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/gang-scheduling.md @@ -0,0 +1,347 @@ +# GangScheduling + +## 概览 +Koord-dscheduler 提供了 Gang Scheduling 满足 All-or-Nothing 调度需求。用户可以声明最小资源集合数(resource-collection-minimum),只有当已经完成调度资源数(assigned-resources)超过前面声明当前最小资源集合数才能触发节点绑定。 +同时提供 `Strict` 和 `NonStrict` 两个参数用于控制 resource-accumulation-process ,区别于其他社区方案将提供 two-level Gang 描述用于更好匹配真实场景。 + +## 动机 +在 AI 场景中很多任务都需要使用 Gang scheduling,社区已经有很多相关实现,比如 `Coscheduling` 、 `Vocalno`,设计过程中我们从社区项目中得到了很多灵感。 + +### 竞品对标 + +#### Coscheduling +1. `Coscheduling` 主要通过实现新型队列排序(queue-sort)接口以及其他方法将一组 Gang pod 尽量有序的出队。 + 举个🌰 ,我们有 10 个任务需要进行 Gang 调度,前面 5 个任务已经调度成功,此时第 6 个任务调度失败,`Coscheduling` 将会回滚前面 5 个已经完成调度的任务,同时会跳过后面 4 个待调度中的任务。 + +2. `Coscheduling` 会简单得使用一个全局间隔时间作为 Gang 调度周期。该设计会带来两个问题: + 1. 问题一,如果配置间隔太长会带来无效等待,如果太短会带来无效调度。 + 2. 问题二,如果待调度分组任务很多,此时大概率会出现周期内无法完成调度,出现调度超时的情况。 + + 对于上面的场景,我们的设计中称为 `Strict`,此场景下调度会严格按照既定配置的周期时间进行工作。 + +3. 有些任务需要复杂的 Gang 要求。例如,一个任务有几个规则,每个规则都有几个 pod 以及自身的 Gang 条件,任务也需要不同的规则来组成不同的 GangGroups。 +一个 GangGroup 中的所有 pod 只有在 GangGroup 中的所有规则都满足 Gang 条件后才触发绑定过程。上游标准的 `Coscheduling` 不能满足这个需求。 + +### 目标 +1. 定义 Gang 调度配置。 + +2. 提供调度器插件实现 Gang 调度。 + +### 非目标/未来工作 +1. 提供使用 `NonStrict` 解决 Gang 资源死锁问题的能力。 + +## 方案 + +### 核心概念 + +#### Strict / NonStrict + +`Strict` 模式,如果其中一个 pod 调度失败,当前调度周期内,其他已经调度成功的 pod 将会被取消调度,同时正在调度中的 pod 将会在 PreFilter 阶段被拒绝调度。 + +`NonStrict` 模式,如果其中一个 pod 调度失败,并不会影响其他 pod 参与调度,会继续累计已经被调度的 pod 直到符合 Gang 调度条件。此模式对于 pod 比较多的情况比较友好,但是会增加不同 Gang 调度之间资源死锁的风险。 +> 举个🌰 ,如果当前资源配额为 10,此时用户提交三组 Gang 调度任务 pod 数都为 5,由于各种条件限制,Gang 调度 1/2/3 任务分别调度起来 pod 数量为 3/3/4, +> 此时当前资源组配额已经耗尽,不会有新的 pod 完成调度,三组 Gang 调度任务就会一直出于等待状态,这就是上面说到到资源死锁情况,目前还没有解决这个问题。 + +#### GangGroup + +`GangGroup`,有些任务需要复杂的 Gang 要求。例如,一个任务有几个规则,每个规则都有几个 pod 以及自身的 Gang 条件,任务也需要不同的规则来组成不同的 GangGroups。 +一个 GangGroup 中的所有 pod 只有在 GangGroup 中的所有规则都满足 Gang 条件后才触发绑定过程。`GangGroup` 则允许我们将不同 Gangs 进行聚合。 + +#### After Gang + +注意⚠️,如果满足 Gang 调度资源积累条件,随后一些 pod 在 binding 阶段失败,或者一些已经绑定的 pod 被抢占或者重新调度,这种情况下 Gang 的约束在资源重新分配过程中是否依然有效? + +答案:应该有效。因为 Gang 的设计初衷要求所有 pod 需要同时被拉起,如果只有其中一些 pod 被拉起,那么后续操作继续执行 Gang 调度策略将失去意义。因此,一旦 Gang 策略已经满足,后续所有的资源分配将不受 Gang 规则约束,后续将使用默认调度进行 pod 调度。 + +#### WaitTime + +`WaitTime` 自第一个 pod 进入 permit 阶段依赖的最大等待时间。如果 `WaitTime` 已经超时,调度器将会回滚所有已经调度完成的 pod,并且更新所有 pod annotation `gang.scheduling.koordinator.sh/timeout=true`,调度器将不会再调度这些 pod。用户需要注意这种情况并及时删除此类 pod。 + +### API +#### 定义 + +我们设计的初衷是优化以及增强社区原有的 `PodGroup` 能力,所以我们的 `PodGroup` 定义会兼容社区设计。我们会提供通过使用更新 annotation 方式使用 Gang 调度特性。 + +#### CRD 方式 +用户可以使用社区 `PodGroup` CRD 声明 Gang: +```go +type PodGroup struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + Spec PodGroupSpec `json:"spec,omitempty"` + Status PodGroupStatus `json:"status,omitempty"` +} +type PodGroupSpec struct { + MinMember int32 `json:"minMember,omitempty"` + MinResources *v1.ResourceList `json:"minResources,omitempty"` + + ScheduleTimeoutSeconds *int32 `json:"scheduleTimeoutSeconds,omitempty"` +} +``` +Pod 需要添加 label `pod-group.scheduling.sigs.k8s.io` 来关联 `PodGroup` 配置。 + +同时,我们也可以使用以下可选配置: +```yaml +gang.scheduling.koordinator.sh/total-number +gang.scheduling.koordinator.sh/mode +gang.scheduling.koordinator.sh/groups +``` +- `gang.scheduling.koordinator.sh/name` 配置 Gang 调度器名称, 名称需要符合 RFC 1123 规范。 + +- `gang.scheduling.koordinator.sh/total-number` 当前配置仅作用于 `Strict` 模式, 详情请参考 `Data-Structure` 部分。默认与 `gang.scheduling.koordinator.sh/min-available` 一致。 + +- `gang.scheduling.koordinator.sh/mode` 选项 `Strict` 或者 `NonStrict`。 默认配置为 `Strict`。 + +- `gang.scheduling.koordinator.sh/groups` 用于配置 GangGroups 名称。默认为空,表示不需要与其他资源合并到 GangGroups,同一个 GangGroups 的 Gangs 可以来自于不同的 namespace。 + +`PodGroup` annotation 可以包含 `gang.scheduling.koordinator.sh/total-number`, `gang.scheduling.koordinator.sh/mode`, `gang.scheduling.koordinator.sh/gang-groups`。 + +##### 示例 +基础 Gang 调度配置如下: +```yaml +apiVersion: v1alpha1 +kind: PodGroup +metadata: + creationTimestamp: "2022-07-11T18:26:33Z" + name: gang-a + namespace: default +spec: + minMember: 5 + minResources: + cpu: "5" + memory: "2048Mi" + scheduleTimeoutSeconds: 600 +``` + +创建一个任务包含两个策略:A 和 B,每个策略包含一些 pod。PodA 属于 roleA,PodB 属于 roleB。roleA、roleB 归属于同一个 GangGroup,示例如下: +```yaml +apiVersion: v1alpha1 +kind: PodGroup +metadata: + creationTimestamp: "2022-07-11T18:26:33Z" + name: gang-a + namespace: namespaceA + annotations: + gang.scheduling.koordinator.sh/total-number: 5 + gang.scheduling.koordinator.sh/mode: Strict + gang.scheduling.koordinator.sh/groups: ["namespaceA/gang-a", "namespaceB/gang-b"] +spec: + minMember: 5 + minResources: + cpu: "5" + memory: "2048Mi" + scheduleTimeoutSeconds: 600 +``` + +注意:如果用户使用 `CRD way`,需要集群管理员提前将 PodGroup 策略部署到集群,否则会出现带有 Gang 配置的 Pod 进行调度时,找不到对应的 Gang 策略 PodGroup 配置。 +此外,从调度的角度来看,调度应该处理 Gang CRD 和 Pod 之间的任务顺序问题。 例如,如果 Pod 在 Gang CRD 之前到达调度,我们必须构建一个假 Gang 数据结构 +临时收集所有相关的 Pod,需要暂停 Pod 的调度,直到从真正的 Gang CRD 解析配置。 + +#### Annotation 方式 +```yaml +gang.scheduling.koordinator.sh/name +gang.scheduling.koordinator.sh/min-available +``` + +以上配置为必填,同时我们兼容社区 annotation `pod-group.scheduling.sigs.k8s.io`, `pod-group.scheduling.sigs.k8s.io/name`以及 `pod-group.scheduling.sigs.k8s.io/min-available` 。 + + +此外,我们还支持以下可选配置: +```yaml +gang.scheduling.koordinator.sh/waiting-time +gang.scheduling.koordinator.sh/total-number +gang.scheduling.koordinator.sh/mode +gang.scheduling.koordinator.sh/groups +``` + +- `gang.scheduling.koordinator.sh/waiting-time` 自第一个 pod 进入 permit 阶段依赖的最大等待时间。默认值可以在全局配置中设置。 + +- `gang.scheduling.koordinator.sh/total-number` 当前配置仅作用于 `Strict` 模式, 详情请参考 `Data-Structure` 部分。默认与 `gang.scheduling.koordinator.sh/min-available` 一致。 + +- `gang.scheduling.koordinator.sh/mode` 选项 `Strict` 或者 `NonStrict`。 默认配置为 `Strict`。 + +- `gang.scheduling.koordinator.sh/groups` 用于配置 GangGroups 名称。默认为空,表示不需要与其他资源合并到 GangGroups,同一个 GangGroups 的 Gangs 可以来自于不同的 namespace。 + +注意⚠️,如果同时通过 CRD 和 annotation 方式进行配置,该 annotation 配置将会覆盖 CRD 配置。同时, GangGroup 名称格式为 " gangNamespace" + "/" + "gangName " + +##### 示例 +基础 Gang 调度配置如下: +```yaml +metadata: + annotations: + gang.scheduling.koordinator.sh/name: gang-a + gang.scheduling.koordinator.sh/min-available: 5 +``` + +创建一个任务包含两个策略:A 和 B,每个策略包含一些 Pod。PodA 属于 roleA,PodB 属于 roleB。roleA、roleB 归属于同一个 GangGroup,示例如下: +```yaml +metadata: + annotations: + gang.scheduling.koordinator.sh/name: gang-a + gang.scheduling.koordinator.sh/waiting-time: 3600s + gang.scheduling.koordinator.sh/min-available: 5 + gang.scheduling.koordinator.sh/total-number: 5 + gang.scheduling.koordinator.sh/mode: Strict + gang.scheduling.koordinator.sh/groups: ["namespaceA/gang-a", "namespaceB/gang-b"] +metadata: + annotations: + gang.scheduling.koordinator.sh/name: gang-b + gang.scheduling.koordinator.sh/waiting-time: 3600s + gang.scheduling.koordinator.sh/min-available: 5 + gang.scheduling.koordinator.sh/total-number: 5 + gang.scheduling.koordinator.sh/mode: Strict + gang.scheduling.koordinator.sh/groups: ["namespaceA/gang-a", "namespaceB/gang-b"] +``` + +创建一个任务包含两个策略:A 和 B,每个策略包含一些 Pod。PodA 属于 roleA,PodB 属于 roleB。roleA、roleB 归属于不同 GangGroup,示例如下: +```yaml +metadata: + annotations: + gang.scheduling.koordinator.sh/name: gang-a + gang.scheduling.koordinator.sh/waiting-time: 3600s + gang.scheduling.koordinator.sh/min-available: 5 + gang.scheduling.koordinator.sh/total-number: 5 + gang.scheduling.koordinator.sh/mode: Strict + gang.scheduling.koordinator.sh/groups: "" +metadata: + annotations: + gang.scheduling.koordinator.sh/name: gang-b + gang.scheduling.koordinator.sh/waiting-time: 3600s + gang.scheduling.koordinator.sh/min-available: 5 + gang.scheduling.koordinator.sh/total-number: 5 + gang.scheduling.koordinator.sh/mode: Strict + gang.scheduling.koordinator.sh/groups: "" +``` + +### 详细设计 +#### QueueSortPlugin + +我们单独设计调度器插件用于实现 `QueueSort` 拓展点,这样就可以将队列排序逻辑集成到所有插件,并且只需要注册一次。 + +当前方案中,我们实现 Less 方法汇总属于相同 Gang 的 pod。具体排序规则为: + +1. 比较两个 pod 的优先级配置,优先级越高的 pod 优先入队。 +2. 比较两个 pod 的创建时间戳,如果 pod 归属于同一个 Gang 配置,我们比较 Gang 配置创建时间,谁先创建则优先入队。 +3. 比较 pod 的 namespace,如果 pod 归属某一个 Gang 配置,则比较 Gang 名称。 + +```go +type QueueSortPlugin interface{ + QueueSort(*QueuedPodInfo, *QueuedPodInfo) bool +} +``` + +#### GangSchedulingPlugin +##### Data-Structure +###### Gang +```go +type Gang struct { + Name string + WaitTime time.Duration + Mode string //Strict or NonStrict + GangGroup []string + MinRequiredNumber int + TotalChildrenNum int + Children map[string]*PodInfo + BoundChildren map[string]*PodInfo + WaitingForBindChildren map[string]*PodInfo + ResourceSatisfied bool + ScheduleCycle int + ScheduleCycleValid bool + ChildrenScheduleRoundMap map[string]int +} +``` + +Gang,用于记录 Gang 调度状态到调度器缓存。 + +- `Children`,用于记录归属于当前 Gang 的 pod 列表。 +- `BoundChildren`,`WaitingForBindChildren` 用于记录已经出于 binding 状态的 pod,用于检查 pod 是否已经通过 permit 阶段。 +- `ResourceSatisfied`,用于标记当前 pod 是否通过调度 Permit 阶段,如果通过则为 true。该字段主要用于判断当前 Gang 调度是否满足条件。 +- `scheduleCycle`,`childrenScheduleRoundMap`,前面两个字段主要用于控制 Gang 调度周期。 +> 举个🌰 ,调度伊始 `scheduleCycle` 字段为 1,`childrenScheduleRoundMap` 中所有 pod 值为 0。 +> 所有 pod 进入 PreFilter 阶段时,将会判断 `childrenScheduleRoundMap` 中 pod 值是否小于 `scheduleCycle` 值; +> 如果上一步校验通过,则将 `childrenScheduleRoundMap` 值设置为 `scheduleCycle` 的值,并通过当前校验; +> 反之则说明当前 pod 在本轮调度周期内已经完成调度,需要拒绝本次调度。 +> 根据 `totalChildrenNum` 字段,当所有 pod 都通过 PreFilter 阶段,说明当前调度周期所有 pod 已经完成调度,`scheduleCycle` 需要累加 1,说明开启新一轮调度周期。 +- `scheduleCycleValid`,当前 Gang 中任意 pod 在 Filter 阶段失败,scheduleCycleValid 将设置为 true,只有所有 pod 全部通过 Filter 阶段,该字段才会设置为 true。 + `scheduleCycleValid=false` 此场景下所有 pod 将不会进行调度,同时所有调度中都 pod 将被在 PreFilter 阶段被拒绝,当新一轮调度周期开启时,`scheduleCycleValid` 才会被设置为 true。 + +注意⚠️ ,`scheduleCycle\scheduleCycleValid\childrenScheduleRoundMap` 仅作用于 `Strict` 模式。 + +##### GangPlugin + +在调度器框架 Plugin 结构提基础上,增加 gangCache 用于缓存 Gang 信息。 +```go +type GangPlugin struct { + frameworkHandler framework.Handle + gangClient gangClient.Interface + podLister listerv1.PodLister + snapshotSharedLister framework.SharedLister + gangCache map[string]*Gang +} +``` +当启动 kubernetes 调度器时,我们仅需要将我们当逻辑挂载到以下 4 个扩展点: +```go +var( + _ framework.PreFilterPlugin = &GangScheduling{} + _ framework.PostFilterPlugin = &GangScheduling{} + _ framework.PermitPlugin = &GangScheduling{} + _ framework.ReservePlugin = &Coscheduling{} +) +type GangScheduling interface{ + ActiveGang(pod *corev1.Pod, state *framework.CycleState) + PreFilter(context.Context, *corev1.Pod) error + PostFilter(ctx context.Context, state *CycleState, pod *v1.Pod, filteredNodeStatusMap NodeToStatusMap) (*PostFilterResult, *Status) + Permit(context.Context, *corev1.Pod) Status + Unreserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) +} +``` +###### **PreFilter** + +`NonStrict` 模式,我们仅处理 步骤一和二: + +- 校验 Gang 下包含所有 pod 是否符合最小数,如果不符合则拒绝当前 pod。 + +- 校验 Gang 是否超时,如果超时则拒绝当前 pod。 + +- 校验 Gang scheduleCycleValid 字段是否为 true,如果为 false 则拒绝当前 pod。 + +- 尝试更新 `scheduleCycle`, `scheduleCycleValid`, `childrenScheduleRoundMap` 字段。 + +###### **PostFilter** + +到达当前阶段说明 pod 没有通过 Filter 校验,操作如下: + +- 如果 `Strict` 模式,设置 `scheduleCycleValid` 字段为 false,同时释放所有已经完成调度的 pod。 + +- 如果 `NonStrict` 模式则不做任何操作。 + +###### **Permit** + +到达当前阶段说明 pod 已经通过 Filter 校验,调度器插件将会计算 GangGroup 下所有 Gang 已经完成调度 pod 数量是否满足 Gang 最小值。 + +- 如果 Gang 不符合 bind 条件,我们会将 pod 状态修改为 "Wait" 并配置超时时间,同时 bind 协程一直保持等待直到超时或者通过校验。 + 随后,我们会执行 `ActiveGang` 操作,该操作会将归属于 Gang 的 pod 从 `schedulableQueue` 或者 `backoffQueue` 队列中迁移到 `activeQueue` 队列, + 如此操作之后,pod 将会被尽快尽享调度。 + +> 注意⚠️ ,社区调度器中,调度周期最长不能超过 15 分钟,我们则需要通过改写 RunPermitPlugins 将调度周期配置超过 15 分钟。 + +- 如果 Gang 符合 bind 条件,我们将等待中 pod 状态修改为 "Success",此时 bind 协程将结束等待并执行后续操作,并将 Gang 对象中 `ResourceSatisfied` 设置为 true。 + +###### **Un-reserve** + +如果 permit 阶段超时且 binding 阶段失败,此时调度阶段将会流转到 un-reserve 阶段,我们通过 Gang 对象中 `ResourceSatisfied` 值判断,如果此时值为 true 说明 binding 阶段失败,反之则说明 Gang 超时。 + +- 如果 permit 阶段超时,我们将在所有 Gang 下所有 pod annotation 中增加 `gang.scheduling.koordinator.sh/timeout=true`,同时释放所有已经调度成功的 pod。 + 此时,Gang 下所有 pod 将永远不会再进行调度,用户需要手动处理 permit 超时问题。 + +- 如果 binding 阶段失败,Gang 资源累计操作将会结束,随后会回滚所有失败的 pod 。 + +###### **Init** + +我们将 watch pod 事件,并根据事件类型持续更新 Gang。 + +## 未解问题 + +## 可选性 + +用户可以根据具体场景选择使用 Gang `Strict` 或者 `NonStrict` 模式。 diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/koordlet-overview.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/koordlet-overview.md new file mode 100644 index 000000000..0f334fa1e --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/koordlet-overview.md @@ -0,0 +1,42 @@ +# Koordlet + + +## 摘要 +Koordlet 是部署在 Kubernetes 节点中的 DaemonSet,用于混部资源超卖、干扰检测、QoS 保障等。它由几个模块组成,分别负责信息收集、数据分析和 QoS 管理。 +一些模块还提供了框架脚手架,提供了一组插件进行扩展(如"QoS Manager"),以便于添加新策略。 + + +## 架构 +![image](/img/koordlet-arch.svg) + +## 模块 + +### Metrics Advisor +Metric Advisor 提供节点、Pod 和容器的资源使用和性能特征的基本信息。 +它是一个独立的模块,定期收集、处理和导出资源画像。它还检测运行容器的干扰,例如 CPU 调度、内存分配延迟和压力阻塞信息(Pressure Stall Information, PSI)。 +该信息将广泛用于资源超卖和 QoS 保障插件。 + +### Storage +Storage 管理来自 Metrics Advisor 和 States Informer 的信息,提供一系列增删改查的API,并对过期数据定期清理。 +它有两种类型的数据:静态和时间序列。时间序列类型存储历史数据用于统计目的,例如 CPU 和内存使用情况。静态类型包括节点、Pod 和容器的状态信息,例如节点的 CPU 信息、Pod 的元数据。 + +### States Informer +States Informer 从 kube-apiserver 和 kubelet 同步节点和 Pod 状态,并将数据作为 `static` 类型保存到 Storage 中。与其他模块相比,该模块在开发迭代中应该保持相对稳定。 + +### QoS Manager +QoS Manager 协调一组插件,这些插件负责按优先级保障 SLO,减少 Pod 之间的干扰。插件根据资源分析、干扰检测以及 SLO 策略配置,在不同场景下动态调整资源参数配置。通常来说,每个插件都会在资源调参过程中生成对应的执行计划。 + +QoS Manager 可能是迭代频率最高的模块,扩展了新的插件,更新了策略算法并添加了策略执行方式。 +一个新的插件应该实现包含一系列标准API的接口,确保 QoS Manager 的核心部分简单且具有较好的可维护性。 +高级插件(例如用于干扰检测的插件)会随着时间的推移变得更加复杂,在孵化已经稳定在 QoS Manager 中之后,它可能会成为一个独立的模块。 + +### Metrics Reporter +Metrics Reporter 从 Storage 中读取历史指标和状态数据,然后将它们合并并发送到 ApiServer,这些数据将被 Koordinator Manager 用于资源超卖模型管理。 +Metrics Reporter 还支持针对不同混部场景的多种处理算法。 + +### Runtime Hooks +Runtime Hooks 充当运行时 Hook 管理器的后端服务器。 Runtime Hook 管理器是一个 CRI 代理,它拦截CRI请求,调用后端服务器注入策略,如通过 Pod 优先级设置资源隔离参数,应用资源分配策略。 +Runtime Hooks 提供了一个框架来维护不同类型的策略,并在容器的生命周期中提供灵活的扩展点。 + +#### 例如 Pod 生命周期中的 LLC 隔离注入 +![image](/img/llc-isolation.svg) diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/load-aware-scheduling.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/load-aware-scheduling.md new file mode 100644 index 000000000..a233834c6 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/load-aware-scheduling.md @@ -0,0 +1,114 @@ +# 负载感知调度 + +## 摘要 + +虽然 Koordiantor 通过超卖机制超卖资源可以提高节点的利用率,但也会因为 BestEffort 类型的工作负载干扰延迟敏感型应用,尤其是当节点负载水位较高时,这种干扰带来的影响会放大,不仅可能导致延迟敏感型应用的服务质量,也可能导致 BestEffort 类型的工作负载本身也不能很快的完成任务。 + +## 动机 + +Koordinator 通过超卖机制超卖一些资源。尽管它可以提高节点的利用率,但 BestEffort 工作负载也可能会干扰对延迟敏感的应用程序。 + +### 目标 + +1. 提供可配置的调度插件来帮助控制集群资源利用率。 +2. 资源利用率控制机制支持多种资源。 +3. 将资源利用率控制在安全阈值。 + +### 非目标/未来工作 + +1. 通过应用画像帮助插件实现更合理的评估机制并获得更好的均衡效果。这是一项后续工作,将在不同的提案下完成。 + +## 用户故事 + +### 故事 1 + +当节点的资源利用率达到高阈值时,节点上正在运行的工作负载之间会发生严重的资源争用。例如,由于更高优先级的应用程序需要资源,因此经常 BestEffort 的工作负载。结果,BestEffort 的工作负载超时甚至被迫结束;或者对延迟敏感的应用程序将在高利用率下遭受严重的性能下降,无法满足外部 SLA。应该避免这种情况。 + +### 故事 2 + +混部集群中的工作负载具有不同的资源需求。典型的 CPU 密集型工作负载预计会使用更多 CPU,而其他类型的工作负载可能会使用更多内存。有可能 CPU 资源的利用率比较高,而内存资源的利用率比较低。在这种情况下,资源的不平衡利用会影响调度的效果,甚至可能导致资源空闲但 Pod 无法调度的问题。 + +### 故事 3 + +Koordinator 定义 NodeMetric CRD 来描述节点的资源使用情况,并由 Koordlet 定期更新。但是,如果在更新周期中有很多 Pod 调度到冷节点(即资源利用率低的节点),当这些 Pod 开始运行时,这些节点的资源利用率可能会超过预期的阈值。结果,这些 Pod 的运行时质量并没有预期的那么好。 +### 故事 4 + +由于节点异常,Koordlet 可能无法报告最新的资源使用情况。在调度过程中应避免此类节点,以防止出现意外异常。 + +## 实施细节 + +![image](/img/load-aware-scheduling-arch.svg) + +调度插件过滤异常节点并根据资源使用情况对其进行评分。这个调度插件扩展了 Kubernetes 调度框架中定义的 Filter/Score/Reserve/Unreserve 扩展点。 + +### 过滤不健康的节点 + +默认过滤异常节点,但是用户可以根据需要通过配置来决定是否开启。 + +- 过滤 Koordlet 无法更新 NodeMetric 的节点。如果配置启用,插件将排除 nodeMetrics.status.updateTime >= LoadAwareSchedulingArgs.nodeMetricExpirationSeconds 的节点。 + +- 按利用率阈值过滤节点。如果配置启用,插件将排除 latestUsageUtilization >= 利用率阈值的节点。 在过滤阶段,仅从最新的 NodeMetric 中获取资源利用率,已分配但尚未统计的 Pod 的资源利用率不参与计算,以便为新创建的 Pod 分配资源,避免因估算不合理而导致调度失败。 + +### 评分算法 + +评分算法的核心逻辑是选择资源使用量最小的节点。但是考虑到资源使用上报的延迟和 Pod 启动时间的延迟,时间窗口内已经调度的 Pod 和当前正在调度的 Pod 的资源请求也会被估算出来,并且估算值将参与计算。 + +### 插件配置 + +```go + +type LoadAwareSchedulingArgs struct { + metav1.TypeMeta + + FilterExpiredNodeMetrics *bool `json:"filterExpiredNodeMetrics,omitempty"` + NodeMetricExpirationSeconds *int64 `json:"nodeMetricExpirationSeconds,omitempty"` + ResourceWeights map[corev1.ResourceName]int64 `json:"resourceWeights,omitempty"` + UsageThresholds map[corev1.ResourceName]int64 `json:"usageThresholds,omitempty"` + EstimatedScalingFactors map[corev1.ResourceName]int64 `json:"estimatedScalingFactors,omitempty"` +} + +``` + +- `FilterExpiredNodeMetrics` 指定是否过滤 Koordlet 无法更新 NodeMetric 的节点。 +- `NodeMetricExpirationSeconds` 表示 NodeMetric 过期时间,单位为秒;当NodeMetric过期时,节点被认为异常。默认为180秒。 +- `ResourceWeights` 表示资源的权重。默认情况下,CPU 和内存的权重都为1。 +- `UsageThresholds` 表示资源利用率阈值,CPU 的默认值为65%,内存的默认值为95%。 +- `EstimatedScalingFactors` 表示估计资源使用情况时的系数。CPU 的默认值为85%,内存的默认值为70%。 + +`FilterExpiredNodeMetrics` 控制 Filter 行为,如果值为 `false`,`NodeMetricExpirationSeconds` 在计分时仍然可以使用。 + +### 自定义节点指标更新周期 + +此插件依赖于 NodeMetric 的报告周期。需要根据不同的场景和工作量设置不同的报告周期。如果报告周期比较长,Koordlet 需要在报告周期内进行汇总,以保证指标的效果。因此,NodeMetricSpec 需要扩展以支持用户自定义的报告周期和聚合周期。用户可以修改 `slo-controller-config` 来完成相应的配置,Koord-Manager 中的控制器会负责更新相关节点的 NodeMetrics 的上报周期和聚合周期字段。 + +```go +// NodeMetricSpec defines the desired state of NodeMetric +type NodeMetricSpec struct { + // CollectPolicy defines the Metric collection policy + CollectPolicy *NodeMetricCollectPolicy `json:"metricCollectPolicy,omitempty"` +} + +// NodeMetricCollectPolicy defines the Metric collection policy +type NodeMetricCollectPolicy struct { + // AggregateDurationSeconds represents the aggregation period in seconds + AggregateDurationSeconds *int64 `json:"aggregateDurationSeconds,omitempty"` + // ReportIntervalSeconds represents the report period in seconds + ReportIntervalSeconds *int64 `json:"reportIntervalSeconds,omitempty"` +} +``` + +### 自定义节点使用阈值 + +目前,节点的资源利用率阈值是根据经验配置的,以保证节点的运行质量。但也有一些方法可以评估节点上运行的工作负载,以达到更合适的资源利用率阈值。例如,在分时场景中,可以设置更高的阈值以允许调度在延迟敏感的应用程序的低谷期间运行更多的 BestEffort 工作负载。当对延迟敏感的应用程序的峰值出现时,降低阈值并驱逐一些 BestEffort 工作负载。此外,可以使用 3-sigma 来分析集群中的利用率水平,以获得更合适的阈值。 + +支持用户通过 Annotation 自定义节点资源利用率阈值。 + +```go +const ( + AnnotationCustomUsageThresholds = "scheduling.koordinator.sh/usage-thresholds" +) + +type CustomUsageThresholds struct { + UsageThresholds map[corev1.ResourceName]int64 `json:"usageThresholds,omitempty"` +} +``` \ No newline at end of file diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/multi-hierarchy-elastic-quota-management.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/multi-hierarchy-elastic-quota-management.md new file mode 100644 index 000000000..6c8cebc88 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/multi-hierarchy-elastic-quota-management.md @@ -0,0 +1,342 @@ +# Multi Hierarchy Elastic Quota Management + +## Summary +When several users or teams share a cluster, fairness of resource allocation is very important. This proposal provides +multi-hierarchy elastic quota management mechanism for the scheduler. +- It supports configuring quota groups in a tree structure, which is similar to the organizational structure of most companies. +- It supports the borrowing / returning of resources between different quota groups, for better resource utilization efficiency. +The busy quota groups can automatically temporarily borrow the resources from the idle quota groups, which can improve the +utilization of the cluster. At the same time, when the idle quota group turn into the busy quota group, it can also automatically +take back the "lent-to" resources. +- It considers the resource fairness between different quota groups. When the busy quota groups borrow the +resources from the idle quota groups, the resources can be allocated to the busy quota groups under some fair rules. + +## Motivation + +### Compared with competitors + +#### Resource Quotas +[Resource Quotas](https://kubernetes.io/docs/concepts/policy/resource-quotas/) provides the ability to restrain the upper +limit of resource usage in one quota group. The quota group resource usage aggregated based on the pod resource configurations. +Suppose there are still free resources in the cluster, but the resource usage of this quota group is close to the limit. +The quota group cannot flexibly borrow the idle resources from the cluster. The only possible way is to manually adjust the +limit of the quota group, but it is difficult to determine the timing and value of the adjustment when there are lots of +quota groups. + +#### Elastic Quota +[Elastic Quota](https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/kep/9-capacity-scheduling/README.md#goals) +proposed concepts of "max" and "min". "Max" is the upper bound of the resource consumption of the consumers. "Min" is the minimum +resources that are guaranteed to ensure the functionality/performance of the consumers. This mechanism allows the workloads +from one quota group to "borrow" unused reserved "min" resources from other quota groups. The unused "min" of one quota group +can be used by other quota groups, under the condition that there is a mechanism to guarantee the "victim" quota group can +consume its "min" resource whenever it needs. + +If multiple quota groups need borrow unused reserved "min" resources from other quota groups at the same time, +the implementation strategy is FIFO, which means that one quota group may occupy all "borrowed-from "resources, +while other quota groups cannot borrow any resources at all from the cluster. + +Neither of the above support multi hierarchy quota management. + +### Goals +1. Define API to announce multi hierarchy quota configuration. + +2. Provides a scheduler plugin to achieve multi hierarchy quota management ability. + +### Non-goals/Future work +Users have two ways to manage GPU quotas. One is to only declare the number of GPU cards in the quota group, but do not +care about the specific card type assigned. The other is to specify the quotas required by different card types. For example, +suppose user A\B both has 10 GPU quota, and cluster has two GPU types A100\V100. quotaA only declare 10 GPU quota, so in the +scheduling process, as long as the total number of GPU cards allocated to A is 10, no matter what the allocation ratio of +a100\v100 is, it will meet the expectation. QuotaB also declare 10 GPU quota, but has more details with V100 is 5 and A100 is 5, +so the maximum allocation of V100 is 5 and A100 is 5 in the scheduling will meet the expectation. + +We know that the GPU card type reflected by the label or annotation on the node, not in the resource dimension, so we can't +simply configure nvidia.com/gpu-v100, nvidia.com/gpu-a100 directly into the quota group's resource dimension. + +What's more complicated is that in a cluster, there will be multiple quota groups like A\B at the same time, +These two modes will conflict. Suppose that the cluster resource has 20 cards, including 10 cards for A100 and 10 cards for V100. +If the scheduler first assigns 10 cards to quota groupA with all V100, then quota group B's V100 resource has no way to be guaranteed, +which obviously does not meet expectations. Therefore, we need to solve the problem that if the above two modes coexist, +the quota mechanism can still work normally. + +The above problems will be solved in the next proposal. + +## Proposal + +### Key Concept\User Stories +1. Each quota group declares its own "min" and "max". The semantics of "min" is the quota group's guaranteed resources, +if quota group's "request" less than or equal to "min", the quota group can obtain equivalent resources to the "request". +The semantics of "max" is the quota group's upper limit of resources. We require "min" to be less than or equal to max. + +2. We define "request" as the sum pod's request in the quota group. When some quota groups "request" is less than "min", and some +quota groups "request" is more than "min", the unused resources of the former can be lent to (or you can choose not to share) the +latter. The latter should use these resources according to the fair rule. When the former needs to use the "lent-to" resources, +the latter should also return the "borrowed-from" resources according to the fair rule. + +3. We define the "runtime" as the current actual resource that can be used by the quota group. For a quota group whose "request" +is less than min, the value of "runtime" is equal to "request". That is to say "request" should be unconditionally satisfied +if the "request" is less than "min". For a quota group whose "request" is greater than "min", the value of "runtime" is between +"min" and "max", and the part exceeding "min" is based on its own "request", the "lent-to" resources, and the ability of +other quota groups to compete for "lent-to" resources. This will be described in detail below. + +4. Hierarchy is very important in a resource-shared cluster. Suppose that the cluster shared by multiple departments, and +each department has multiple teams. If each team is a quota group, we naturally hope that the relationship between departments +and teams is tree shaped. In this way, no matter how to add, delete or adjust quota groups within the department, it is an +internal matter of the department. The cluster administrator only needs to be responsible for the quota configuration at the +level of departments, and the quota group's configuration can delegate power to the department itself. Moreover, tree can +help us easily see the summary of resources from the perspective of departments when there are lots of teams in one department. + +Another advantage of tree structure is that we can control the scope of the "lent-to" resource. For example, a department only +wants to its quota groups can borrow resources from each other, while the resources of the department do not want to be lent +to other departments. This is very convenient for the tree structure. It should be pointed out that although two levels can +meet most scenarios (the more levels, the higher the maintenance complexity), we will support that the height of the quota-tree +is arbitrary. + +### Implementation Details + +#### Calculate RuntimeQuota + +We use an example to introduce how to calculate "runtime". Suppose the cluster total resource is 100, and has 4 quotas, +the configuration and "request" of each quota group described as below: + +![image](/img/runtimequota1.jpg) + +We first calculate the "min" part of "runtime". It should be like as below: + +![image](/img/runtimequota2.jpg) + +Then we find quota groupA can lent 5 quotas to B\C\D, and the cluster has 40 quotas to allocate, so the sum is 45 for B\C\D +to share. We introduce a new field to represent the allocation fairness, which is called "shared-weight". "shared-weight" determines +the ability of quota groups to compete for shared resources. That is to say, B/C/D will allocate resources in the cluster according +to its "shared-weight". + +For example, assuming that the weights of B\C\D are 60\50\80 + +- B can get 45 * 60 / (60 + 50 + 80) = 14 + +- C can get 45 * 50 / (60 + 50 + 80) = 12 + +- D can get 45 * 80 / (60 + 50 + 80) = 19 + +However, quota group B only need 5 more due to request is 20 and min is 15, and quota group C and D are still hungry, +so quota group B can share 14 - 5 = 9 to C and D. + +![image](/img/runtimequota3.jpg) + +quota group C and D can still share the remained quota of 9 by allocation proportion, which C get 9 * 50 / (50 + 80) = 3, +D get 9 * 80 / (50 + 80) = 6, and we get the runtime of each quota group finally. + +![image](/img/runtimequota4.jpg) + +The whole process can be summarized as follows: + +1. The quota divided into two categories, one is whose "request" is less than "min", we call it "lent-to-quotas". The other is +whose "request" is greater than "min", we call it "borrowed-quotas". + +2. Calculate the "runtime" of each quota group not exceed "min", so we can get how many resources can be lent to "borrowed-quotas". + +3. The "borrowed-quotas" share the resources by allocation proportion. + +4. If the new "runtime" is larger than "request", there will be new resources which can be lent to the rest "borrowed-quotas". + +It is very difficult to manage the weight of thousands of quota groups in a company. Therefore, we need to set a default value +for the "shared-weight". According to our experience in online operations, using max as the default "shared-weight" of the quota +group can satisfy most scenarios. In this way, "max" has both the meaning of resource ceiling and allocation proportion: the +larger the "max" is, the more resources it wants. For individual special scenarios, the resource administrator can adjust the weight. + +It must be pointed out that if the cluster resources suddenly decrease due to node failure, the sum of "min" may be +greater than the total resources of the cluster. If this case happens, we can't grantee "min" of each quota group actually. +So we will reduce the "min" of each quota group in a moderate proportion, which is to ensure that the sum of +"min" actually in effect is less than the total resources of the cluster. + +We need to introduce the concept of "sys-group". "sys-group" means that the "min" of this quota group is infinite, +and its request will never be bound by the quota. It is usually used for system level pods. When the scheduler starts, +the "sys-group" will be created by default not only in scheduler memory, but also try create the quota group crd. +Its "min" and "max" are INT_MAX. At the same time, its "min" will not be reduced in proportion to the above process. +The real available total resource of normal quota groups is the cluster total resource minus the "used" of the "sys-group". + +We also need to introduce the concept of "default-group". If the pod cannot find a matching quota group, it will be +matched to the "default-group". the "default-group" will be created by default not only in scheduler memory, but also try +create the quota group crd. Its "min" and "max" has default value, users can modify them on demand. + +#### Hierarchy +We can organize quota groups using quota-tree, each quota group has its own configuration. Currently, we only allow leaf +nodes to submit jobs. An example is as below: + +![image](/img/quotatree1.jpg) + +When we calculate the "request" of each quota group. We first count the requests of each parent group from the bottom up, +which is the accumulation of mathematical min(child group request, child group max). + +![image](/img/quotatree2.jpg) + +Then we calculate the "runtime" from top to bottom. The "runtime" of the parent quota group is the total resources of the +child quota groups. First we calculate parent quota group's "runtime". + +![image](/img/quotatree3.jpg) + +Then we calculate child quota group's "runtime". + +![image](/img/quotatree4.jpg) + +#### Min Guarantee and Preemption +Considering the following situations, suppose that the cluster has two quotas group A\B. At t0 time, only quota groupA has job +submission, it can borrow from quota group B's resource, and the "request" and "used" of quota group are both 100 as below: + +![image](/img/quotaguarantee1.jpg) + +At t1 time, quota groupB has job submission too, so the "runtime" of quota group A\B is both 50. However, if quota +groupA don't return resource back, quota groupB can't assign any resource cause node resource occupied by the quota groupA. + +![image](/img/quotaguarantee2.jpg) + +The solution is that we will monitor the relationship between "used" and "runtime" of each quota group in the background thread. +If quota group's "used" continues to be greater than "runtime", we will start the forced recycling mechanism to kill +several pods in the order of priority from low to high until the "used" is less than or equal to "runtime". If some pods +in the quota group do not want to be recycled, we require such pods can only use resource up to "min". By default, we +assume all pods can use resource beyond "min" if "runtime" larger than "min". + +We do not adopt the cross quota preemption method to solve the problem that when quota group "used" is less than "runtime" +(to preempt the quota group whose "used" is greater than the "runtime"). Due to each quota group has an accurate runtime, +we can accurately recycle the overused resources of each quota group. This is more direct than preemption. + +In addition, we do not think that cross quota preemption is worth recommending. In principle, the priorities of different +quota groups are not comparable, because they may come from different business lines. The high priority of this business line +is not more important than the low priority of other business lines. Only priorities within a quota group have comparative +significance. So we will not support cross quota preemption temporary. Moreover, in inner quota preemption, we will limit +existUsed - preempted + preempt smaller than runtime. + +It can be seen from the above, if "min" of the quota group is not equal to "max", the "runtime" part exceeding "min" may +recycled by the scheduler. + +#### Configuration Limit +We introduce several constraints to ensure that the quota mechanism works properly. + +1. Except for the first level quota group, we require that the sum of "min" of all sub quota groups should be less than or +equal to the "min" of parent group. The reason for excluding the first level quota group is that the cluster resources +cannot avoid jitter. If the cluster resource reduced, we don't want to hinder the update of the quota groups. + +2. The "max" of child quota group can be larger than the "max" of parent group. Consider the following scenario, there are +2 subtrees in the cluster, "dev-parent" and "production-parent". Each subtree has several "quota-groups". When "production" +is busy, we can limit the resource use of the "dev" by only decreasing the "max" of "dev-parent", instead of decreasing +the "max" of each sub quota group of "dev-parent". + +3. Parent group cannot run pod. We did receive a request to allow the parent group to submit jobs. The priority of the +parent group's self jobs is higher than that of all the sub-groups, which means that the parent group's self jobs can +preempt the "runtime" of the sub-group's jobs at any time. This is somewhat similar to the hierarchical relationship of +"Town City province". Due to complexity,we do not support this issue for now. + +4. The parent of node can only be parent group, not child group. + +5. A quota group can't be converted on the attribute of parent group\child group. + +6. We allow a node on the quota tree to freely change its parent node, as long as it does not break the existing detection rules. + +We will introduce a new "web-hook" to check the configuration limitation. + +#### Extension Point + +##### PreFilter +We will check if the (Pod.request + Quota.Used) is less than Quota.Runtime. If not, the scheduling cycle of Pod will fail. + +##### PostFilter +We will re-implement the method selectVictimsOnNode in defaultPreempt. The original selectVictimsOnNode method selects all +the pods with the lower priority than the preemptor’s priority as potential victims in a node. For now, we only allow +inner-quota-group preemption. + +##### Cache and Controller +1. We will watch the event of quota group and pod to calculate "runtime" of each quota group. +2. We will create a thread to update quota group crd to display "request\used\runtime" periodicity. +3. We will create a thread to monitor "used" and "runtime" of each quota group. If quota group's "used" continues to be +greater than "runtime", we will start the forced recycling mechanism to kill several pods in the order of priority from +low to high until the "used" is less than or equal to "runtime". + +### API + +#### Quota +We will reuse [Elastic Quota](https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/kep/9-capacity-scheduling/README.md#goals) +'s crd to declare quota group. + +```go +type ElasticQuota struct { + metav1.TypeMeta + metav1.ObjectMeta + Spec ElasticQuotaSpec + Status ElasticQuotaStatus +} + +type ElasticQuotaSpec struct { + Min v1.ResourceList + Max v1.ResourceList +} + +type ElasticQuotaStatus struct { + Used v1.ResourceList +} +``` + +we will also add new annotation and labels to achieve our desired functionality. +```yaml +annotations: + quota.scheduling.koordinator.sh/runtime: {cpu:4, memory: 8Gi} + quota.scheduling.koordinator.sh/shared-weight: {cpu:4, memory: 8Gi} +labels: + quota.scheduling.koordinator.sh/is-parent: false + quota.scheduling.koordinator.sh/parent-quota-name: "parent" + quota.scheduling.koordinator.sh/allow-lent-resource: true +``` +- `quota.scheduling.koordinator.sh/runtime` is updated by the scheduler. It reflects the "runtime" of the quota group. +- `quota.scheduling.koordinator.sh/is-parent` is disposed by the user. It reflects the "child\parent" attribute of the quota group. Default is child. +- `quota.scheduling.koordinator.sh/parent-quota-name` is disposed by the user. It reflects the parent quota name. Default is root. +- `quota.scheduling.koordinator.sh/shared-weight` is disposed by the user. It reflects the ability to share the "lent to" resource. Default equals to "max". +- `quota.scheduling.koordinator.sh/allow-lent-resource` is disposed by the user. It reflects whether quota group allows lent unused "min" to others. + +Here is a example: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: test + namespace: test + annotations: + quota.scheduling.koordinator.sh/runtime: {cpu:4, memory: 8Gi} + quota.scheduling.koordinator.sh/shared-weight: {cpu:4, memory: 8Gi} + labels: + quota.scheduling.koordinator.sh/is-parent: false + quota.scheduling.koordinator.sh/parent-quota-name: "parent" + quota.scheduling.koordinator.sh/allow-lent-resource: true +spec: + max: + cpu: 20 + memory: 40Gi + nvidia.com/gpu: 2 + min: + cpu: 10 + memory: 20Gi + nvidia.com/gpu: 1 +``` + +#### Pod +We introduce a new label on the pod to associate pod with quota group: +```yaml +labels: + quota.scheduling.koordinator.sh/quota-name: "test1" +``` + +if pod's don't have the label, we will follow [Elastic Quota](https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/kep/9-capacity-scheduling/README.md#goals) +using namespace to associate pod with quota group. + +### Compatibility +We are fully compatible with [Elastic Quota](https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/kep/9-capacity-scheduling/README.md#goals) 's interface. +If pod's don't have the "quota-name" label, we will use the namespace to associate pod with quota group. If the pod has +the "quota-name" label, we will use it to associate pod with quota group instead of namespace. If we can't find the +matched quota group, we force the pod to associate with the "default-group". + +## Unsolved Problems +Please see Non-goals/Future work. + +## Alternatives + +## Implementation History + +## References diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/node-prediction.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/node-prediction.md new file mode 100644 index 000000000..9bda2cc8a --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/node-prediction.md @@ -0,0 +1,278 @@ +# Node Prediction + +## Summary + +The *node prediction* is proposed to both improve the node utilization and avoid overloading. By profiling the +tendency of the node metrics, we can estimate the peak usage and implement more efficient over-commitment policy. + +## Motivation + +Scheduling pods with setting appropriate resource requirements is truly hard to follow. Underestimating requests can +bring performance issues. However, overvaluing requests is likely to cause resource waste and low efficiency. One +common approach is using Vertical Pod Autoscaler (VPA) to autopilot the resource requirements for the pods of the same +workload. The VPA optimizes the resource requirements of the pod according to the pod metrics of the same workload. It +estimates the pod usage and specifies proper resource requirements. It works well when we want to optimize the resource +requirements of workloads. However, most VPA approaches try to abandon the time series attribute from the pod metrics +and generate a relatively static requests/limits that should guarantee to make no bad ignoring the timing. It leaves +the usage-to-limit gap, i.e. the gap between the recommended pod request with the real-time pod usage, and the +well-known pooling effect, i.e. the gap between the sum of the pod usages with the node usage. Inspired by +[Google's work](#references) in the EuroSys'21, we propose the node prediction in Koordinator to conquer these two +gaps. + +### Goals + +- Define the node prediction API. +- Propose an online history-based-optimized (HBO) prediction model. +- Clarify how the Mid-tier resources are calculated with the prediction. + +### Non-Goals/Future Work + +- Propose a time-series-forecasting-based or offline prediction model. + +## User Stories + +### Story 1 + +As a cluster administrator, there are many web service pods allocating almost node resources. Whereas, the node +utilization is low since most allocated resources are not actually used. To improve node utilization, I want to reclaim +the unused resources to submit some low-priority online-service pods and Flink jobs. However, I am concerned with the +risks of over-utilization bringing machine overload which may cause the performance degradation and hurt the pod QoS. + +### Story 2 + +As a Kubernetes developer, I want to support the long-term load balancing in the scheduler. Thus, I need the information +that which nodes should be idle for a long time. + +## Design + +### Design Principles + +- The node prediction is low-cost and can be implemented in the Koordlet. +- The node prediction is pluggable. Users can replace the default model to customize the prediction. + +### Architecture + +The node prediction is implemented mainly in the Koordlet and Koord-Manager. The architecture is as below: + +![image](/img/node-prediction.svg) + +- Koordlet: The agent runs on the node. It implements the metrics collection, metrics storage, and predict server. + - Metrics Advisor: It collects the cpu/memory usage of the node and running pods. It stores the collected metrics in the Metric Cache. + - Metric Cache: It stores the node and pod metrics in a TSDB, which allows other modules to query the metrics later. + - Predict Server: With the node and pod metrics retrieved from the Metric Cache, it calculates and checkpoints the predicted result based on the prediction model. + - States Informer: It maintains the metadata of the node and the pods. It also reports the latest prediction periodically to the kube-apiserver. +- Koord-Manager: The controller runs on a master node. + - Configuration delivery: It maintains the prediction and colocation strategies and distributes the node strategy onto the NodeMetric. + - Resource Calculator: It fetches the node prediction result, and calculates the resource allocatable of the reclaimed resources (i.e. Mid-tier resource). +- Koord-Scheduler: It schedules the pod with different priority bands (e.g. Prod, Mid, Batch). It can enable load-aware scheduling to balance the over-committed nodes' utilization. + +#### Workflow + +In the koordlet, stages to update the node prediction are as follows: + +1. Histogram initialization: The predict server initializes a set of histograms for CPU and memory. For implementing `N-Sigma_v1`, it initializes decayed histograms only for the node and priority classes. While implementing `N-Sigma_v2`, it initializes histograms both for the node and every running pod. +2. Metrics collection: The metrics advisor collects the usage statistics of node and pods and stores them as metric points into the metric cache every CollectInterval (e.g. 1s). +3. Histogram updating: The predict server fetches the node metrics and pod metrics of latest HistogramUpdateInterval (e.g. 30s). Then it uses the aggregated result to update the decayed histograms. +4. Periodical reporting: The states informer fetches node metrics and the last histograms for the node and priority classes every ReportingInterval (e.g. 60s). Then it reports the complete NodeMetric status with last node prediction info to the kube-apiserver. +5. Fast reporting: The states informer fetches the last histograms every CheckPredictionInterval (e.g. 20s). It checks if the predicted result is too small or too larger than the last updated prediction exceeding the ResourceDiffThreshold (e.g. 5%), or the updated duration is longer than ForceUpdateInterval (e.g. 600s). If the check result is true, It updates the latest node prediction to the kube-apiserver. + +In the koord-manager, stages to update the Mid-tier resources allocatable are as follows: + +1. NodeMetric lifecycle management: The koord-manager list-watches the Node and the ConfigMap slo-controller-config, and maintains the lifecycle of the NodeMetric CR. Once the colocation strategy in the slo-controller-config updated, the koord-manager parses the config data and updates the node prediction policy and mid colocation policy into the NodeMetric.Spec. +2. Mid resource updating: The koord-manager list-watches the NodeMetric. Once the NodeMetric status is updated, the koord-manager gets the latest node metrics and node prediction, and calculates the Mid allocatable resources based on the Mid over-commitment formula. Finally, it updates the Mid allocatable resources into the Node status as the extended resources (`kubernetes.io/mid-cpu`, `kubernetes.io/mid-memory`). + +#### Scheduling Optimization + +The results of the node prediction on the NodeMetric, the Mid extended resources on the Node and the scheduling Pod +in the scheduler are updated in different time. It is inevitable to find that the scheduler schedules a pod with an +older version of the node prediction, which may cause the schedule result "lagged". + +To relief the lagged prediction, the koordlet and koord-manager try both updating earlier when the +prediction/NodeMetric differs from the previous result than a threshold and set a resource buffer which should +tolerant most of the result changes between synchronizations. + +For the worst case in which the prediction could be lagged too much (e.g. 1 hour), we can maintain a lower bound of +the real Mid allocatable resources inside the scheduler. This part is not planned in the first version of the Mid-tier +over-commitment. + +### API + +#### Node Prediction + +##### Predict Policy + +```go +// ColocationStrategy defines the colocation strategy in slo-controller-config ConfigMap. +type ColocationStrategy struct { + // ... + NodePredictPolicy *slov1alpha1.PredictPolicy `json:"nodePredictPolicy,omitempty"` +} + +type NodeMetricSpec struct { + // ... + PredictPolicy *PredictPolicy `json:"predictPolicy,omitempty"` +} + +// PredictPolicy defines the policy for the node prediction. +type PredictPolicy struct { + ResourceDiffThresholdPercent *int64 `json:"resourceDiffThresholdPercent,omitempty"` + ColdStartPeriodSeconds *int64 `json:"coldStartPeriodSeconds,omitempty"` +} +``` + +##### Predicted Result + +```go +type NodeMetricStatus struct { + // ... + // ProdReclaimableMetric is the estimated reclaimable resources for the Prod-type pods. + ProdReclaimableMetric *ReclaimableMetric `json:"prodReclaimableMetric,omitempty"` +} + +type ReclaimableMetric struct { + // Resource is the resource usage of the prediction. + Resource ResourceMap `json:"resource,omitempty"` +} +``` + +#### Mid Overcommitment + +##### Colocation Strategy + +```go +type ColocationStrategy struct { + // ... + // MidCPUThresholdPercent defines the maximum percentage of the Mid-tier cpu resource dividing the node allocatable. + // MidCPUAllocatable <= NodeCPUAllocatable * MidCPUThresholdPercent / 100. + MidCPUThresholdPercent *int64 `json:"midCPUThresholdPercent,omitempty" validate:"omitempty,min=0,max=100"` + // MidMemoryThresholdPercent defines the maximum percentage of the Mid-tier memory resource dividing the node allocatable. + // MidMemoryAllocatable <= NodeMemoryAllocatable * MidMemoryThresholdPercent / 100. + MidMemoryThresholdPercent *int64 `json:"midMemoryThresholdPercent,omitempty" validate:"omitempty,min=0,max=100"` +} +``` + +##### Extended Resources + +```yaml +apiVersion: v1 +kind: Node +metadata: + name: test-node +status: + allocatable: + cpu: '32' + memory: 129636240Ki + pods: '213' + kubernetes.io/mid-cpu: '16000' # allocatable cpu milli-cores for Mid-tier pods + kubernetes.io/mid-memory: 64818120Ki # allocatable memory bytes for Mid-tier pods + capacity: + cpu: '32' + memory: 129636240Ki + pods: '213' + kubernetes.io/mid-cpu: '16000' + kubernetes.io/mid-memory: 64818120Ki +``` + +### Theoretical Model + +#### Node Peak Prediction + +Before elaborating the peak prediction algorithm, let's formalize the node peak prediction problem. + +Let's denote the usage of a Pod `p` at the time `t` is `U(p, t)`. + +Then the usage of a Node `M` which schedules a set of Pods is `MU(Pods, t) = sum[p in Pods](U(p, t))`. + +> Note that the non-Pod usage of the node can be regarded as the usage of a special pod `S`. + +When we want to predict the node peak at the time `T`, we are calculating +`Peak(Pods, T) = max[t >= T](sum[p in Pods](U(p, t)))`. + +The predicted peak `Peak(Pods, T)` is our node prediction result at `T`. + +#### N-sigma Prediction + +There are several [statistical peak prediction models](#alternatives) which are practical to implement in the online +scheduler. [*N-sigma*](#references) is the picked peak prediction model in the current implementation. It assumes the +timing node metrics follow the Gaussian distribution, which allows us to estimate the node peak with the mean and +standard deviation (stdev): + +`Peak_N-Sigma_v1(Pods, T) = mean[T0 <= t <= T](MU(Pods, t)) + N * stdev[T0 <= t <= T](MU(Pods, t))` + +The `Peak_N-Sigma_v1` is the predicted node peak. It is implemented as the first version of node prediction, which is +calculated based on node-level metrics. + +Moreover, we can calculate with the pods' metrics: + +`Peak_Pods-N-Sigma'(Pods, T) = sum[p in Pods](mean[T0 <= t <= T](U(p, t)) + N * stdev[T0 <= t <= T](U(p, t)))` + +A more conservative is derived from their maximal. The `Peak_N-sigma_v2` is the second version of node prediction, +which also considers the pod-level metrics. + +`Peak_N-Sigma_v2(Pods, T) = max(Peak_N-Sigma_v1(Pods, T), Peak_Pods-N-Sigma(Pods, T))`. + +#### Mid-tier Overcommitment + +In the first version, the Mid-tier resource contains the reclaimable resources which are probably unused in the +long-term by the high-priority (i.e. Prod) pods. +The resource calculation for the Mid-tier resources can be described as follows: + +``` +Allocatable[Mid] := min(Reclaimable[Mid], NodeAllocatable * thresholdRatio) +``` + +- `Reclaimable[Mid] := max(0, reclaimRatio * Allocated[Prod] - Peak[Prod])`. The peak prediction model is used for estimating the future usage of the running Prod pods. The Mid pods can allocate a proportion of reclaimed resources from running Prod pods. +- `NodeAllocatable * thresholdRatio` is the maximal co-located Mid-tier resource setting from a ratio of the node allocatable. + +In next versions, the Mid-tier resource is planned to mix with the default node allocatable (i.e. the Prod allocatable), +which means a Mid pod can allocate the unallocated node allocatable resource, and an idle node is able to schedule Mid +pods. The Prod pods can preempt the Mid pods when the mixed allocatable is exhausted by the Mid pods, so that the +Prod-tier resource is still more stable and guaranteed than the Mid-tier. +Then the resource calculation for the mixed Mid-tier resources can be described as follows: + +``` +Allocatable[Mid]' := min(Reclaimable[Mid], NodeAllocatable * thresholdRatio) + Unallocated[Mid] +Unallocated[Mid] = max(NodeAllocatable - Allocated[Prod], 0) +``` + +## Alternatives + +### Peak Prediction Models + +There are several different peak prediction and time series forecasting models which can estimate the future peak +based on the historical node metrics, including statistical methods and machine learning methods. In this proposal, +statistical peak prediction models are preferred since they are practical to implement in the online scheduling system, +have less overhead of metrics collection than the ML approaches, and more simple to analyze and debug. + +Here are some common statistical peak prediction models: + +1. [Borg-default](#references) + +Borg-default simply over-commits the machine resources in a fixed rate `a`, which means the peak usage is regarded as +the result of the requests dividing `a`. + +Let's denote the resource request of the Pod `p` at the time `t` is `R(p, t)`, where `R(p, t) = 0` when `p` is not +running. Then we have, + +`Peak_Borg-default(Pods, T) = 1/a * sum[p in Pods](R(p, T))`, `a = 1.1` by default. + +2. [Resource Central](#references) + +Resource Central considers the peak of the machine as the sum of the peak of individual pods (or VMs). And a simple +peak prediction of a pod is the percentile of the historical usages, e.g. `percentile[t in [T-C, T]](U(p, t))`. + +`Peak_ResourceCentral(Pods, T) = sum[p in Pods](percentile[t in [T-C, T]](U(p, t)))` + +3. [Max](#references) + +The Max prediction model does not use the historical metrics directly, but takes the maximal of any known peak results. +It gets the more conservative result than the input models. For example, we have a `Max_Borg-default_ResourceCentral` +model calculated from the Borg-default and Resource Central models: + +`Peak_Max_Borg-default_ResourceCentral(Pods, T) = max(Peak_Borg-default(Pods, T), Peak_ResourceCentral(Pods, T))` + +## References + +1. Vertical Pod Autoscaler: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler +2. Bashir, Noman, et al. "Take it to the limit: peak prediction-driven resource overcommitment in datacenters." Proceedings of the Sixteenth European Conference on Computer Systems. 2021. +3. Cortez, Eli, et al. "Resource central: Understanding and predicting workloads for improved resource management in large cloud platforms." Proceedings of the 26th Symposium on Operating Systems Principles. 2017. diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/nri-mode-resource-management.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/nri-mode-resource-management.md new file mode 100644 index 000000000..f7e45e5ec --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/nri-mode-resource-management.md @@ -0,0 +1,152 @@ +# NRI Mode Resource Management + +## Glossary + +NRI, node resource interface. See: https://github.com/containerd/nri + +## Summary + +We hope to enable NRI mode resource management for koordinator for easy deployment and in-time control. + +## Motivation + +Koordinator as a QoS-based scheduling for efficient orchestration of microservices, AI, and big data workloads on Kubernetes and its runtime hooks support two working [modes](https://github.com/koordinator-sh/koordinator/blob/main/docs/design-archive/koordlet-runtime-hooks.md) for different scenarios: `Standalone` and `Proxy`. However, both of them have some [constraints](https://shimo.im/docs/m4kMLdgO1LIma9qD). NRI (Node Resource Interface), which is a public interface for controlling node resources is a general framework for CRI-compatible container runtime plug-in extensions. It provides a mechanism for extensions to track the state of pod/containers and make limited modifications to their configuration. We'd like to integrate NRI framework to address `Standalone` and `Proxy` constraints based on this community recommend mechanism. + +### Goals + +- Support NRI mode resource management for koordinator. +- Support containerd container runtime. + +### Non-Goals/Future Work + +- Support docker runtime + +## Proposal + +Different from standalone and proxy mode, Koodlet will start an NRI plugin to subscribe pod/container lifecycle events from container runtime (e.g. containerd, crio), and then koordlet NRI plugin will call runtime hooks to adjust pod resources or OCI spec. The flow should be: + +- Get pod/container lifecycle events and OCI format information from container runtime (e.g. containerd, crio). +- Transform the OCI format information into internal protocols. (e.g. PodContext, ContainerContext) to re-use existing runtime hook plugins. +- Transform the runtime hook plugins' response into OCI spec format +- Return OCI spec format response to container runtime(e.g. containerd, crio). + +![nri-proposal.png](/img/nri-proposal.png) + +### User Stories + +#### Story 1 +As a cluster administrator, I want to apply QoS policy before pod's status become running. + +#### Story 2 +As a cluster administrator, I want to deploy koordinator cluster without restart. + +#### Story 3 +As a cluster administrator, I want to adjust resources' policies at runtime. + +#### Story 4 +As a GPU user, I want to inject environment before pod running. + +### Requirements + +- Need to upgrade containerd to >= 1.7.0, crio to >= v1.25.0 + +#### Functional Requirements + +NRI mode should support all existing functionalities supported by standalone and Proxy mode. + +#### Non-Functional Requirements + +Non-functional requirements are user expectations of the solution. Include +considerations for performance, reliability and security. + +### Implementation Details/Notes/Constraints +1. koordlet [NRI plugin](https://github.com/containerd/nri/blob/main/plugins/template/plugin.go) +```go +type nriServer struct { + stub stub.Stub + mask stub.EventMask + options Options // server options +} + +// Enable 3 hooks (RunPodSandbox, CreateContainer, UpdateContainer) in NRI +func (p *nriServer) Configure(config, runtime, version string) (stub.EventMask, error) { +} + +// Sync all pods/containers information before koordlet nri plugin run +func (p *nriServer) Synchronize(pods []*api.PodSandbox, containers []*api.Container) ([]*api.ContainerUpdate, error) { +} + +func (p *nriServer) RunPodSandbox(pod *api.PodSandbox) error { + podCtx.FromNri(pod) + RunHooks(...) + podCtx.NriDone() +} + +func (p *nriServer) CreateContainer(pod *api.PodSandbox, container *api.Container) (*api.ContainerAdjustment, []*api.ContainerUpdate, error) { + containerCtx.FromNri(pod, container) + RunHooks(...) + containCtx.NriDone() +} + +func (p *nriServer) UpdateContainer(pod *api.PodSandbox, container *api.Container) ([]*api.ContainerUpdate, error) { + containerCtx.FromNri(pod, container) + RunHooks(...) + containCtx.NriDone() +} +``` +2. koordlet enhancement for NRI +- PodContext +```go +// fill PodContext from OCI spec +func (p *PodContext) FromNri(pod *api.PodSandbox) { +} + +// apply QoS resource policies for pod +func (p *PodContext) NriDone() { +} +``` +- ContainerContext +```go +// fill ContainerContext from OCI spec +func (c *ContainerContext) FromNri(pod *api.PodSandbox, container *api.Container) { +} + +// apply QoS resource policies for container +func (c *ContainerContext) NriDone() (*api.ContainerAdjustment, []*api.ContainerUpdate, error) { +} +``` + +### Risks and Mitigations + +## Alternatives +There are several approaches to extending the Kubernetes CRI (Container Runtime Interface) to manage container resources such as `standalone` and `proxy`. Under `standalone` running mode, resource isolation parameters will be injected asynchronously. Under `proxy` running mode, proxy can hijack CRI requests from kubelet for pods and then apply resource policies in time. However, `proxy` mode needs to configure and restart kubelet. + +There are a little difference in execution timing between `NRI` and `proxy` modes. Hook points (execution timing) are not exactly same. The biggest difference is `proxy` call koordlet hooks between kubelet and containerd. However, NRI will call NRI plugin (koodlet hooks) in containerd, that means containerd still could do something before or after containerd call NRI plugin (koordlet hooks). For example, under `NRI` running mode, containerd setup pod network first and then call NRI plugin (koordlet hooks) in RunPodSanbox, but under `proxy` running mode, containerd couldn't do anything before koordlet hooks running when `proxy` handle RunPodSandbox CRI request. + +- Standalone + + - kubelet -- CRI Request -> CRI Runtime -- OCI Spec -> OCI compatible runtime -> containers + - kubelet -> Node Agent -> CRI Runtime / containers + +![standalone.png](/img/standalone.png) + +- Proxy + + - kubelet -- CRI Request -> CRI Proxy -- CRI Request (hooked) -> CRI Runtime -- OCI Spec -> OCI compatible runtime -> containers + +![proxy.png](/img/proxy.png) + +- NRI + + - kubelet -- CRI Request -> CRI Runtime -- OCI Spec --> OCI compatible runtime -> containers +                  ↘   ↗ +                Koordlet NRI plugin + +![nri.png](/img/nri.png) + +## Upgrade Strategy + +- Need to upgrade containerd to 1.7.0+ or CRIO to 1.26.0+ +- Need to enable NRI + + diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/pod-migration-job.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/pod-migration-job.md new file mode 100644 index 000000000..47a94aba8 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/pod-migration-job.md @@ -0,0 +1,374 @@ +# PodMigrationJob + +## Summary + +This proposal defines a CRD-based Pod migration API, through which the descheduler or other automatic fault recovery components can evict or delete Pods more safely. At the same time, the proposal also describes the specific implementation details of the API. + +## Motivation + +Migrating Pods is an important capability that many components (such as deschedulers) rely on, and can be used to optimize scheduling or help resolve workload runtime quality issues. We believe that pod migration is a complex process, involving steps such as auditing, resource allocation, and application startup, and is mixed with application upgrading, scaling scenarios, and resource operation and maintenance operations by cluster administrators. Therefore, how to manage the stability risk of this process to ensure that the application does not fail due to the migration of Pods is a very critical issue that must be resolved. + +Therefore, it is necessary to realize a final state-oriented migration capability based on CRD, track the status of each process in the migration, and perceive scenarios such as upgrading and scaling of the application. + +### Goals + +1. Defines a CRD-based Pod Migration Job API, through which the descheduler can evict or delete Pods more safely. +2. Describe in detail the design details behind the API. + +### Non-Goals/Future Work + +1. A new descheduler framework +2. Descheduling capability for different scenarios such as load-aware descheduling, defragemention, etc. +3. The details about Deterministic preemption that preempts other Pods for Reservation. + +## Proposal + +### User Stories + +#### Story 1 + +The descheduler in the K8s community evicts pods to be rescheduled according to different strategies. However, it does not guarantee whether the evicted Pod has resources available after re-creation. If a large number of new Pods are in the Pending state when the resources in the cluster are tight, may lower the application availabilities. + +#### Story 2 + +The descheduler evicts the Pod through the Eviction API, and the Eviction API decides whether to delete the Pod according to the PDB status. However, it is unable to perceive workload upgrades, scaling and other scenarios in which Pods are deleted, which will also bring security risks. + +#### Story 3 + +The Pod migration capability itself can be provided to users as a service. Users can integrate this API in their own systems to achieve safe migration, and are no longer limited to deschedulers. + + +### Basic Migration API + +These APIs provide cluster administrators with more fine-grained migration control capabilities, which can better reduce risks. + +- `scheduling.koordinator.sh/eviction-cost` indicates the eviction cost. It can be used to set to an int32. The implicit eviction cost for pods that don't set the annotation is 0, negative values are permitted. If set the cost ith `math.MaxInt32`, it means the Pod will not be evicted. Pods with lower eviction cost are preferred to be evicted before pods with higher eviction cost. If a batch of Pods to be evicted have the same priority, they will be sorted by cost, and the Pod with the smallest cost will be evicted. Although the K8s community has [Pod Deletion Cost #2255](https://github.com/kubernetes/enhancements/issues/2255), it is not a general mechanism. To avoid conflicts with components that use `Pod Deletion Cost`, users can individually mark the eviction cost for Pods. + + +### Pod Migration Job CRD + +In order to support the above user stories, a Custom Resource Definition(CRD) named `PodMigrationJob` is proposed to ensure the migration process safely. + +#### Migration Job Spec + +```go + +// PodMigrationJob is the Schema for the PodMigrationJob API +// +k8s:openapi-gen=true +// +kubebuilder:resource:scope=Cluster +type PodMigrationJob struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + Spec PodMigrationJobSpec `json:"spec,omitempty"` + Status PodMigrationJobStatus `json:"status,omitempty"` +} + +type PodMigrationJobSpec struct { + // Paused indicates whether the PodMigrationJob should to work or not. + // Default is false + // +optional + Paused bool `json:"paused,omitempty"` + + // TTL controls the PodMigrationJob timeout duration. + // +optional + TTL *metav1.Duration `json:"ttl,omitempty"` + + // Mode represents the operating mode of the Job + // Default is PodMigrationJobModeReservationFirst + // +optional + Mode PodMigrationJobMode `json:"mode,omitempty"` + + // PodRef represents the Pod that be migrated + // +required + PodRef *corev1.ObjectReference `json:"podRef"` + + // ReservationOptions defines the Reservation options for migrated Pod + // +optional + ReservationOptions *PodMigrateReservationOptions `json:"reservationOptions,omitempty"` + + // DeleteOptions defines the deleting options for the migrated Pod and preempted Pods + // +optional + DeleteOptions *metav1.DeleteOptions `json:"deleteOptions,omitempty"` +} + +type PodMigrationJobMode string + +const ( + PodMigrationJobModeReservationFirst PodMigrationJobMode = "ReservationFirst" + PodMigrationJobModeEvictionDirectly PodMigrationJobMode = "EvictDirectly" +) + +type PodMigrateReservationOptions struct { + // ReservationRef if specified, PodMigrationJob will check if the status of Reservation is available. + // ReservationRef if not specified, PodMigrationJob controller will create Reservation by Template, + // and update the ReservationRef to reference the Reservation + // +optional + ReservationRef *corev1.ObjectReference `json:"reservationRef,omitempty"` + + // Template is the object that describes the Reservation that will be created if not specified ReservationRef + // +optional + Template *ReservationTemplateSpec `json:"template,omitempty"` + + // PreemptionOption decides whether to preempt other Pods. + // The preemption is safe and reserves resources for preempted Pods. + // +optional + PreemptionOptions *PodMigrationJobPreemptionOptions `json:"preemptionOptions,omitempty"` +} + +type PodMigrationJobPreemptionOptions struct { + // Reserved object. +} +``` + +- `Paused` indicates whether the PodMigrationJob should to work or not. In some scenarios, the user does not expect the PodMigrationJob Controller to process the PodMigrationJob immediately, but rather to decide whether to execute it after completing some operations similar to auditing. +- `TimeoutInSeconds` controls the PodMigrationJob timeout duration. +- The `PodMigrationJob` support two modes defined by the field `Mode`: + - `PodMigrationJobModeReservationFirst` means that before migrating a Pod, try to reserve resources through the `Reservation` API, delete the Pod to be migrated after successfully reserved, and observe the status of the `Reservation` to ensure that the `Reservation` is consumed. + - `PodMigrationJobModeEvictionDirectly` indicates that the user clearly knows the risk of evicting the Pod and decides to evict the Pod directly. + - If `Mode` is not specified, `PodMigrationJobModeReservationFirst` is used by default +- `PodRef` represents the Pod that be migrated. The field is required. +- `ReservationOptions` defines options for how to reserve resource through `Reservation` API: + - `ReservationRef` if is specified, the referenced `Reservation` instance is used first. In some scenarios, such as defragmentation, in order to ensure the reliability of the upper-layer logic, resources may have been reserved on the target node. In this case, the specified `Reservation` can be used directly. + - `Template` describes the spec of `Reservation`. It is often not necessary to set this field. When neither `ReservationRef` nor `Template` is specified, the `PodMigrationJob controller` will construct the `ReservationSpec` reserved resources according to the Spec of the migrated Pod. If `Template` is set, the `ReservationTemplateSpec` and the Spec of the migrated Pod will be merged to construct the `ReservationSpec` reserved resources. + - `PreemptionOptions` decides whether to preempt other Pods if reserved resources failed. The specific details of preemption will be submitted in a separate proposal description in future work, and will not be expanded here for the time being. +- `DeleteOptions` defines the options of delete operation. Whether to delete a Pod through the `K8s Delete API` or evict a Pod through the `K8s Eviction API` depends on how the user configures the parameters of the `PodMigrationJob Controller`. Users only need to set `DeleteOptions` according to the workload in their own cluster. + +#### Migration Job Status + +```go +type PodMigrationJobStatus struct { + // PodMigrationJobPhase represents the phase of a PodMigrationJob is a simple, high-level summary of where the PodMigrationJob is in its lifecycle. + // e.g. Pending/Running/Failed + Phase PodMigrationJobPhase `json:"phase,omitempty"` + // Status represents the current status of PodMigrationJob + // e.g. ReservationCreated + Status string `json:"status,omitempty"` + // Reason represents a brief CamelCase message indicating details about why the PodMigrationJob is in this state. + Reason string `json:"reason,omitempty"` + // Message represents a human-readable message indicating details about why the PodMigrationJob is in this state. + Message string `json:"message,omitempty"` + // Conditions records the stats of PodMigrationJob + Conditions []PodMigrationJobCondition `json:"conditions,omitempty"` + // NodeName represents the node's name of migrated Pod + NodeName string `json:"nodeName,omitempty"` + // PodRef represents the newly created Pod after being migrated + PodRef *corev1.ObjectReference `json:"podRef,omitempty"` + // PreemptedPodsRef represents the Pods that be preempted + PreemptedPodsRef []corev1.ObjectReference `json:"preemptedPodsRef,omitempty"` + // PreemptedPodsReservations records information about Reservations created due to preemption + PreemptedPodsReservations []PodMigrationJobPreemptedReservation `json:"preemptedPodsReservation,omitempty"` +} + +type PodMigrationJobPreemptedReservation struct { + // Namespace represents the namespace of Reservation + Namespace string `json:"namespace,omitempty"` + // Name represents the name of Reservation + Name string `json:"name,omitempty"` + // NodeName represents the assigned node for Reservation by scheduler + NodeName string `json:"nodeName,omitempty"` + // Phase represents the Phase of Reservation + Phase string `json:"phase,omitempty"` + // PreemptedPodRef represents the Pod that be preempted + PreemptedPodRef *corev1.ObjectReference `json:"preemptedPodRef,omitempty"` + // PodsRef represents the newly created Pods after being preempted + PodsRef []corev1.ObjectReference `json:"podsRef,omitempty"` +} + +type PodMigrationJobCondition struct { + // Type is the type of the condition. + Type PodMigrationJobConditionType `json:"type"` + // Status is the status of the condition. + // Can be True, False, Unknown. + Status PodMigrationJobConditionStatus `json:"status"` + // Last time we probed the condition. + // +nullable + LastProbeTime metav1.Time `json:"lastProbeTime,omitempty"` + // Last time the condition transitioned from one status to another. + // +nullable + LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"` + // Unique, one-word, CamelCase reason for the condition's last transition. + Reason string `json:"reason,omitempty"` + // Human-readable message indicating details about last transition. + Message string `json:"message,omitempty"` +} + +type PodMigrationJobPhase string + +const ( + // PodMigrationJobPending represents the initial status + PodMigrationJobPending PodMigrationJobPhase = "Pending" + // PodMigrationJobRunning represents the PodMigrationJob is being processed + PodMigrationJobRunning PodMigrationJobPhase = "Running" + // PodMigrationJobSucceed represents the PodMigrationJob processed successfully + PodMigrationJobSucceed PodMigrationJobPhase = "Succeed" + // PodMigrationJobFailed represents the PodMigrationJob process failed caused by Timeout, Reservation failed, etc. + PodMigrationJobFailed PodMigrationJobPhase = "Failed" + // PodMigrationJobAborted represents the user forcefully aborted the PodMigrationJob. + PodMigrationJobAborted PodMigrationJobPhase = "Aborted" +) + +// These are valid conditions of PodMigrationJob. +const ( + PodMigrationJobConditionReservationCreated PodMigrationJobConditionType = "ReservationCreated" + PodMigrationJobConditionReservationScheduled PodMigrationJobConditionType = "ReservationScheduled" + PodMigrationJobConditionPreemption PodMigrationJobConditionType = "Preemption" + PodMigrationJobConditionEviction PodMigrationJobConditionType = "Eviction" + PodMigrationJobConditionPodScheduled PodMigrationJobConditionType = "PodScheduled" + PodMigrationJobConditionReservationPodBoundReservation PodMigrationJobConditionType = "PodBoundReservation" + PodMigrationJobConditionReservationBound PodMigrationJobConditionType = "ReservationBound" +) + +// These are valid reasons of PodMigrationJob. +const ( + PodMigrationJobReasonTimeout = "Timeout" + PodMigrationJobReasonFailedCreateReservation = "FailedCreateReservation" + PodMigrationJobReasonUnschedulable = "Unschedulable" + PodMigrationJobReasonMissingPod = "MissingPod" + PodMigrationJobReasonMissingReservation = "MissingReservation" + PodMigrationJobReasonPreempting = "Preempting" + PodMigrationJobReasonPreemptComplete = "PreemptComplete" + PodMigrationJobReasonEvicting = "Evicting" + PodMigrationJobReasonFailedEvict = "FailedEvict" + PodMigrationJobReasonEvictComplete = "EvictComplete" + PodMigrationJobReasonWaitForPodBindReservation = "WaitForPodBindReservation" +) + +type PodMigrationJobConditionStatus string + +const ( + PodMigrationJobConditionStatusTrue PodMigrationJobConditionStatus = "True" + PodMigrationJobConditionStatusFalse PodMigrationJobConditionStatus = "False" + PodMigrationJobConditionStatusUnknown PodMigrationJobConditionStatus = "Unknown" +) +``` + +### Implementation Details/Notes/Constraints + +#### PodMigrationJob Controller + +The difference between `PodMigrationJobController` and general controller is that `PodMigrationJobController` will evaluate all pending PodMigrationJobs together (ie PodMigrationJob.Phase is Pending) and select a batch of PodMigrationJob and reconcile them. This selection process is called the arbitration mechanism. The reason why the arbitration mechanism is introduced is mainly to control the stability risk and control the cost of migrating Pods. The arbitration mechanism includes three stages: `Group`, `Filter` and `Sort`. + +##### Group PodMigrationJob + +Aggregate according to different workloads to facilitate the processing of subsequent processes + +- Aggregate PodMigrationJob by workload +- Aggregate PodMigrationJob by Node +- Aggregate PodMigrationJob by Namespace + +##### Filter PodMigrationJob + +- Check how many PodMigrationJob of each workload are in the Running state, and record them as ***migratingReplicas***. If the ***migratingReplicas*** reach a certain threshold, they will be excluded. The detailed algorithm of this threshold is described later. +- Check the number of ***unavailableReplicas*** of each workload, and determine whether the ***unavailableReplicas + migratingReplicas*** conform to the corresponding [PDB(Pod Disruption Budget)](https://kubernetes.io/docs/tasks/run-application/configure-pdb/) or [PUB(Pod Unavailable Budget)](https://openkruise.io/docs/user-manuals/podunavailablebudget). If there is no PDB or PUB, use the algorithm to calculate dynamically. If not, exclude the corresponding PodMigrationJob. +- Check the number of Pods being migrated on the node where each target Pod is located. If it exceeds the maximum migration amount for a single node, exclude it. +- Check the number of Pods being migrated in the Namespace where each target Pod is located. If it exceeds the maximum migration amount for a single Namespace, exclude it + +The detailed algorithm of Workload Max Migrating/Unavailable Replicas: + +```go +func GetMaxMigrating(replicas int, intOrPercent *intstr.IntOrString) (int, error) { + return GetMaxUnavailable(replicas, intOrPercent) +} + +func GetMaxUnavailable(replicas int, intOrPercent *intstr.IntOrString) (int, error) { + if intOrPercent == nil { + if replicas > 10 { + s := intstr.FromString("10%") + intOrPercent = &s + } else if replicas >= 4 && replicas <= 10 { + s := intstr.FromInt(2) + intOrPercent = &s + } else { + s := intstr.FromInt(1) + intOrPercent = &s + } + } + return intstr.GetValueFromIntOrPercent(intOrPercent, replicas, true) +} +``` + +##### Sort PodMigrationJob + +- Pods with higher QoS requirements are given priority, LSE > LSR > LS > BE +- Pods with higher priority will be processed first +- The higher migration priority will be processed first +- If the Pod has already initiated a migration job in the past and it fails, sort by the number of times. The lower the number of times, the priority will be given to processing +- If the workload where the Pod is located has been descheduled for a certain number of times in the past, it is sorted according to the number of times. The lower the number of times, the priority will be processed. +- Sort by the number of replicas being migrated by the workload. The lower the number of replicas being migrated, the priority will be given to processing. + +##### Execute PodMigrationJob + +- Update PodMigrationJobStatus.Phase to Running to trigger the PodMigrationJob controller reconcile these jobs +- PodMigrationJob controller reconciles process: + - If the mode of PodMigrationJob is `EvictionDirectly`, just delete the Pod through the delete method that configured in PodMigrationJob controller. And update the phase of PodMigrationJob to Success. + - If not specified ReservationOptions.ReservationRef, create the Reservation instance by the reservation template or Pod spec to reserve resources. And updates the created Reservation instance to the ReservationOptions.ReservationRef. + - Check the status of Reservation to determine whether reserve resource successfully. + - If failed to reserve, abort the PodMigrationJob and update the phase of PodMigrationJob to Fail + - If successfully reserve, delete the Pod through the delete method that configured in PodMigrationJob controller. + - Check the Reservation status to determine whether the Reservation consumed. + - If Reservation consumed, tracks the status of Reservation and update the status to PodMigrationJob + - Update phase of PodMigrationJob to Success. + +##### Migration Stability mechanism + +- Support for disabling this capability by configuration +- Supports a simple central flow control mechanism to limit the number of migrations over a period of time. + +See the Configuration section for more details + +#### Controller Configuration + +User can configure the `MigrationControllerArgs` through Koordinator Descheduler ConfigMap. + +```go +// MigrationControllerArgs holds arguments used to configure the MigrationController +type MigrationControllerArgs struct { + metav1.TypeMeta + + // DryRun means only execute the entire migration logic except create Reservation or Delete Pod + // Default is false + DryRun bool `json:"dryRun,omitempty"` + + // EvictFailedBarePods allows pods without ownerReferences and in failed phase to be evicted. + EvictFailedBarePods bool `json:"evictFailedBarePods"` + + // EvictLocalStoragePods allows pods using local storage to be evicted. + EvictLocalStoragePods bool `json:"evictLocalStoragePods"` + + // EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods) + EvictSystemCriticalPods bool `json:"evictSystemCriticalPods"` + + // IgnorePVCPods prevents pods with PVCs from being evicted. + IgnorePvcPods bool `json:"ignorePvcPods"` + + // LabelSelector sets whether to apply label filtering when evicting. + // Any pod matching the label selector is considered evictable. + LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"` + + // FlowControlQPS controls the number of arbitrations per second + FlowControlQPS string `json:"flowControlQPS,omitempty"` + // FlowControlBurst is the maximum number of tokens + FlowControlBurst int32 `json:"flowControlBurst,omitempty"` + + // MaxMigratingPerNode represents he maximum number of pods that can be migrating during migrate per node. + MaxMigratingPerNode *int32 `json:"maxMigratingPerNode,omitempty"` + + // MaxMigratingPerNamespace represents he maximum number of pods that can be migrating during migrate per namespace. + MaxMigratingPerNamespace *int32 `json:"maxMigratingPerNamespace,omitempty"` + + // MaxMigratingPerWorkload represents he maximum number of pods that can be migrating during migrate per workload. + // Value can be an absolute number (ex: 5) or a percentage of desired pods (ex: 10%). + MaxMigratingPerWorkload *intstr.IntOrString `json:"maxMigratingPerWorkload,omitempty"` + + // MaxUnavailablePerWorkload represents he maximum number of pods that can be unavailable during migrate per workload. + // The unavailable state includes NotRunning/NotReady/Migrating/Evicting + // Value can be an absolute number (ex: 5) or a percentage of desired pods (ex: 10%). + MaxUnavailablePerWorkload *intstr.IntOrString `json:"maxUnavailablePerWorkload,omitempty"` + + // EvictionPolicy represents how to delete Pod, support "Delete" and "Eviction", default value is "Eviction" + EvictionPolicy string `json:"evictionPolicy,omitempty"` + // DefaultDeleteOptions defines options when deleting migrated pods and preempted pods through the method specified by EvictionPolicy + DefaultDeleteOptions *metav1.DeleteOptions `json:"defaultDeleteOptions,omitempty"` +} + +``` \ No newline at end of file diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/resource-reservation.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/resource-reservation.md new file mode 100644 index 000000000..7fa73c84f --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/resource-reservation.md @@ -0,0 +1,245 @@ +# Resource Reservation + +## Summary + +A scheduling mechanism and its API is provided to reserve node resources for pods may not be created yet. + +## Motivation + +Pods are fundamental units for allocating node resources in Kubernetes, which bind resource requirements with business logic. The scheduler is not able to reserve node resources for specific pods or workloads. We may try using a [fake pod](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#how-can-i-configure-overprovisioning-with-cluster-autoscaler) to prepare resources by the preemption mechanism. However, fake pods can be preempted by any scheduled pods with higher priorities, which make resources get scrambled unexpectedly. + +In Koordinator, a resource reservation mechanism is proposed to enhance scheduling and especially benefits scenarios below: + +1. Preemption: Existing preemption does not guarantee that only preempting pods can allocate preempted resources. With a reservation, the scheduler should be able to "lock" resources preventing from allocation of other pods with the same or higher priority. +2. De-scheduling: For the descheduler, it is better to ensure sufficient resources with the reservation before pods get rescheduled. Otherwise, rescheduled pods may not be runnable anymore and make the belonging application disrupted. +3. Horizontal scaling: Using reservation to achieve more deterministic horizontal scaling. e.g. Submit a reservation and make sure it is available before scaling up replicas. +4. Resource Pre-allocation: Sometimes we want to pre-allocate node resources for future resource demands even if the resources are not currently allocatable. Reservation can help with this and it should make no physical cost. + +### Goals + +- Define the basic API of resource reservation for *Motivations<1,2,3>*, extensible for supporting *Motivation<4>* in the future. +- Provide a scheduler plugin that implements above reservation mechanism. + +### Non-Goals/Future Work + +- Detailed design of reservative preemption/descheduler/horizontal scaler/pre-allocation. +- Modify kubelet admission control for reservation objects. + +## Proposal + +### User Stories + +#### Story 1 + +As a Kubernetes developer, I want to enhance the current **preemption** mechanism since preempted resources may be allocated by pods other than the preemptor. The scheduler can create a reservation for the preempting pods, so the ownership of preempted resources can be guaranteed, making the preemption more reliable. + +#### Story 2 + +As a cluster administrator, I want to use **descheduler** to migrate pods that are placed abnormally to somewhere they could "live better" and fulfill orchestration requirements of the app. e.g. Move pods on a over-utilized node to idler nodes and bind CPUs of same NUMA node. Reservations can be created before rescheduling pods, helping ensure there are sufficient resources and well placement. + +#### Story 3 + +As an application administrator, I want to make the **horizontal scaling** of my app more deterministic by submitting reservations before a scale-up. Besides, I can also reserve resources after a scale-down for future demands. It is useful especially when we want a guaranteed scale-up of applications for the coming business peak. + +#### Story 4 + +As a cluster administrator, I want to **pre-allocate** node resources for future usage no matter whether they are available now or not. I want to allocate the future free resources but do not disrupt the running of scheduled pods. Reservation can be made to pre-allocate resources since it makes no physical cost to the node. It may be in a `Waiting` state. When there is enough space for the reservation, it will become `Available` for the owner pods' scheduling. + +### API + +In this section, a Custom Resource Definition (CRD) named `Reservation` is proposed to allow the scheduler to reserve node resources for specific pods. + +![image](/img/resource-reservation.svg) + +```go +// Reservation objects are non-namespaced. +// It can reserve resources for pods of any namespace. Any affinity/anti-affinity of reservation scheduling can be +// specified in the pod template. +type Reservation struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + Spec ReservationSpec `json:"spec,omitempty"` + Status ReservationStatus `json:"status,omitempty"` +} + +type ReservationSpec struct { + // Template defines the scheduling requirements (resources, affinities, images, ...) processed by the scheduler just + // like a normal pod. + // If the `template.spec.nodeName` is specified, the scheduler will not choose another node but reserve resources on + // the specified node. + Template *corev1.PodTemplateSpec `json:"template,omitempty"` + // Specify the owners who can allocate the reserved resources. + // Multiple owner selectors and ORed. + Owners []ReservationOwner `json:"owners,omitempty"` + // By default, the resources requirements of reservation (specified in `template.spec`) is filtered by whether the + // node has sufficient free resources (i.e. ReservationRequest < NodeFree). + // When `preAllocation` is set, the scheduler will skip this validation and allow overcommitment. The scheduled + // reservation would be waiting to be available until free resources are sufficient. + // NOTE: Not supported in v0.6. + PreAllocation bool `json:"preAllocation,omitempty"` + // Time-to-Live period for the reservation. + // `expires` and `ttl` are mutually exclusive. If both `ttl` and `expires` are not specified, a very + // long TTL will be picked as default. Set 0 to disable the expiration. + TTL *metav1.Duration `json:"ttl,omitempty"` + // Expired timestamp when the reservation expires. + // `expires` and `ttl` are mutually exclusive. Defaults to being set dynamically at runtime based on the `ttl`. + Expires *metav1.Time `json:"expires,omitempty"` +} + +type ReservationStatus struct { + // The `phase` indicates whether is reservation is waiting for process (`Pending`), available to allocate + // (`Available`) or timeout/expired to get cleanup (Failed). + Phase ReservationPhase `json:"phase,omitempty"` + // The `conditions` indicate the messages of reason why the reservation is still pending. + Conditions []ReservationCondition `json:"conditions,omitempty"` + // Current resource owners which allocated the reservation resources. + CurrentOwners []corev1.ObjectReference `json:"currentOwners,omitempty"` + // Name of node the reservation is scheduled on. + NodeName string `json:"nodeName,omitempty"` + // Resource reserved and allocatable for owners. + Allocatable corev1.ResourceList `json:"allocatable,omitempty"` + // Resource allocated by current owners. + Allocated corev1.ResourceList `json:"allocated,omitempty"` +} + +type ReservationOwner struct { + // Multiple field selectors are ANDed. + Object *corev1.ObjectReference `json:"object,omitempty"` + Controller *ReservationControllerReference `json:"controller,omitempty"` + LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"` +} + +type ReservationControllerReference struct { + // Extend with a `namespace` field for reference different namespaces. + metav1.OwnerReference `json:",inline"` + Namespace string `json:"namespace,omitempty"` +} + +type ReservationPhase string + +const ( + // ReservationPending indicates the Reservation has not been processed by the scheduler or is unschedulable for + // some reasons (e.g. the resource requirements cannot get satisfied). + ReservationPending ReservationPhase = "Pending" + // ReservationAvailable indicates the Reservation is both scheduled and available for allocation. + ReservationAvailable ReservationPhase = "Available" + // ReservationWaiting indicates the Reservation is scheduled, but the resources to reserve are not ready for + // allocation (e.g. in pre-allocation for running pods). + ReservationWaiting ReservationPhase = "Waiting" + // ReservationFailed indicates the Reservation is failed to reserve resources, due to expiration or marked as + // unavailable, which the object is not available to allocate and will get cleaned in the future. + ReservationFailed ReservationPhase = "Failed" +) + +type ReservationCondition struct { + Type ReservationConditionType `json:"type,omitempty"` + Status ConditionStatus `json:"status,omitempty"` + Reason string `json:"reason,omitempty"` + Message string `json:"message,omitempty"` + LastProbeTime metav1.Time `json:"lastProbeTime,omitempty"` + LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"` +} + +type ReservationConditionType string + +const ( + ReservationConditionScheduled ReservationConditionType = "Scheduled" + ReservationConditionReady ReservationConditionType = "Ready" +) + +type ConditionStatus string + +const ( + ConditionStatusTrue ConditionStatus = "True" + ConditionStatusFalse ConditionStatus = "False" + ConditionStatusUnknown ConditionStatus = "Unknown" +) + +const ( + ReasonReservationScheduled = "Scheduled" + ReasonReservationUnschedulable = "Unschedulable" + ReasonReservationAvailable = "Available" + ReasonReservationExpired = "Expired" +) +``` + +### Implementation Details + +#### Reservation Plugin + +##### Schedule Reservations + +A `Reservation` object has its scheduling requirements like a pod. Ideally, A `Reservation` object should get processed directly by the scheduler like a pod. However, it can require a series of modifications on [scheduling framework](https://kubernetes.io/docs/concepts/scheduling-eviction/scheduling-framework/), losing the compatibility with standard kube-scheduler, kubelet, autoscaler, etc. In the reservation plugin, we fake one *reservation pod* for one `Reservation` inside the scheduler to fulfill general scheduling plugins (noderesources, nodeaffinity, tainttolerations, ...). The scheduling framework can handle `Reservation` objects by processing fake pods in both [scheduling cycle and binding cycle](https://kubernetes.io/docs/concepts/scheduling-eviction/scheduling-framework/#scheduling-cycle-binding-cycle). + +A fake pod inside the scheduler can construct the same affinity/anti-affinity constraints as owner pods, which may change the reservation result. To handle this problem, koord-scheduler extends the framework to skip check of pod affinity for existing reservations in the `Filter` phase. + +A reservation specified `PreAllocation` intends to pre-allocate resources on nodes. The scheduler will skip its filtering of node resources in the scheduling cycle. However, the scheduled reservation will be `Waiting` to be `Available` until there are enough resources to fulfill its requests. + +If all nodes are unscheduled for the reservation, the scheduler keeps its status as `Pending` and sets `Conditions` with the failure message. + +Once the scheduling decision has been made, the corresponding `Reservation` object is updated with a new status indicating whether the reservation succeeded or not. The fake pod does not expose to other components, and the kubelet without modification does not perceive a `Reservation` assigned. Fortunately, a `Reservation` does not need to be executable on the node, so existing containers can keep running as usual without additional admissions. + +If a reservation has set the `nodeName` (inside the `template` field), the scheduler is responsible for checking if the node can fulfill the reservation since kubelet does not do admissions for the reservation. + +##### Allocate Reserved Resources + +Let's call the reservation is *allocatable* for a pod if: + +1. The reservation is available. +2. The pod matches the reservation owner spec. +3. There are sufficient free resources in the reservation to fulfill the pod. + +When the reservation plugin is enabled, the scheduler checks for every scheduling pod if there are allocatable reservations on a node. With a `Score` plugin implemented, the scheduler prefers pods to schedule on nodes which have more allocatable reserved resources. + +When a pod is scheduled on a node with allocatable reservations, it allocates resources belonging to one of reservations. To pick one of reservations, we choose the one which can get most reserved resources allocated (i.e. MostAllocated). And the scheduler also annotates the pod with the reservation info. + +##### Expiration and Cleanup + +When a reservation has been created for a long time exceeding the `TTL` or `Expires`, the scheduler updates its status as `Expired`. For expired reservations, the scheduler will cleanup them with a custom garbage collection period. + +When a node is deleted, the available and waiting reservations on the node should be marked as `Failed` since they are not allocatable any more. + +#### Use Cases + +To generally reserve node resources, submit a `Reservation` and set the pod template in the field `spec.template`. Then the koord-scheduler will update this `Reservation` with the scheduling result and the resources will get reserved. + +To be more specific, + +- `spec.template` specifies the fundamental resource requirements of a reservation. The scheduler will schedule the fake pod based on the template. +- `spec.owners` specifies which kinds of pods can use the reservation. +- `spec.ttl` and `expires` specifies the expiration for the reservation. +- `spec.preAllocation` indicates whether the scheduler should filter with its resource requirements. Otherwise, the pre-allocation of node resources is allowed, and the reservation will become available until there are sufficient resources. +- `status.phase` is marked as `Pending` when the Reservation is created. And it is marked as `Available` when the Reservation is successfully scheduled. +- `status.conditions` shows why the reservation is unscheduled or failed. +- When a Reservation is `Available` on the node, only specified pods can allocate the reserved resources. + +##### Usage in Preemption + +The [Priority Preemption](https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption/#preemption) happens in the PostFilter phase trying to make preemptive pods schedulable by evicting low-priority pods. When a pod succeeds the preemption, the pod `status` will be patched with a *nominated node* where the scheduler do the eviction. However, the preemptor's nominated node is not always the same as the scheduled node, since the scheduler does not reserve resources for the preemptor. +To ensure the preemptive resources are for the preemptor, firstly the scheduler can create a reservation that both sets `owners` with the preemptor pod and relevant affinity rules for reserving resources of the preempts. Then the scheduler evict pods, and the reservation will become `Available` once the resources are released. Finally, the preemptor pods can get scheduled on the nodes with preemptive resource reserved. + +##### Usage in Descheduling + +Before a pod is rescheduled, the descheduler can create a reservation that sets `template` and `owners` for the candidate. When the reservation becomes `Available`, the descheduler can assign the pod to allocate the reserved resources. This solves the problem in which the rescheduled pod has stopped at the old node but cannot run on the new node. Moreover, the descheduler can migrate resources between pods by setting the `preAllocation` field. + +##### Usage in Pre-allocation + +Reservations with `preAllocation` specified allow users to pre-allocate the node resources from running pods. The `status.phase` of the reservation is set as `Waiting` until the resources are released, indicating that its availability is conditional. Once the referenced pods have terminated, the `phase` is `Available` for owners, and the pre-allocation succeeds. + +### Risks and Mitigations + +Kubelet without any modification possibly ignore `Reservation` objects in predicate admission, which increases the chance of unexpected overcommitment at nodes. `Reservation` does not require any physical resources to be executable, so the overcommitment is mainly a problem only when pods get scheduled with `Reservation` and start to run, which is somewhat easier to mitigate since Kubelet do admit these pods. To further descrease the possibility of unexpected overcommitment or pods admit failures, we could use resource estimation for in-flight pods, balance pods to the nodes with less reserved resources, etc. + +## Unsolved Problems + +As stated above, `Reservation` can generate the same pod affinity/anti-affinity rules as the owner pods. The problem gets resolved in the koord-scheduler by extending scheduling framework, but it still limits the standard kube-scheduler. + +## Alternatives + +### Use a `pause` pod with a low priority to reserve resources + +Reserving resources with [`pause` pods with very low assigned priority](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#how-can-i-configure-overprovisioning-with-cluster-autoscaler) does work when the preemption can be precisely enabled for specific pods. In the example of cluster autoscaler, `pause` pods are helpful when we need to overprovision resources to prevent idle nodes from scaling down by CA. However, a `pause` pod has no reservation guarantee except `priority`. As declared above, many scenarios require reservations to rely on other pod characteristics (e.g. names, namespaces, labels, priorityClass), where `pause` pods cannot meet the demands. + +## References + +1. [Kueue Pod Resource Reservation](https://docs.google.com/document/d/1sbFUA_9qWtorJkcukNULr12FKX6lMvISiINxAURHNFo) diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/runtime-proxy.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/runtime-proxy.md new file mode 100644 index 000000000..ab26955e9 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/designs/runtime-proxy.md @@ -0,0 +1,136 @@ +# RuntimeProxy + +## 摘要 + +KoordRuntimeProxy 充当 Kubelet 和 Containerd 之间的代理( Dockershim 场景下是 Dockerd ),它用于拦截 CRI 请求,并应用一些资源管理策略, +如混合工作负载编排场景下按实例优先级设置不同的 cgroup 参数,针对最新的 Linux 内核、CPU 架构应用新的隔离策略等。 + +这里涉及两个组件,KoordRuntimeProxy 和 RuntimePlugins。 + +![image](/img/koord-runtime-proxy-architecture.svg) + +## 目标 + +- 增强基于 QoS 的调度的资源管理。 +- 为 CRI 不支持的新隔离特性提供接口。 + +## 组件 + +### KoordRuntimeProxy + +KoordRuntimeProxy 负责在 Pod 的生命周期内拦截请求,例如 RunPodSandbox、CreateContainer 等, +在请求从后端 Containerd(Dockerd) 到 Kubelet 之间的传输过程中,会调用 RuntimePlugins 做资源隔离策略。 +KoordRuntimeProxy 提供了一个隔离策略执行框架,允许注册的自定义插件执行指定的隔离策略,这些插件称为 RuntimePlugins。 KoordRuntimeProxy 本身不执行任何隔离策略。 + +### RuntimePlugins + +RuntimePlugins 将事件(如 RunPodSandbox 等)注册到 KoordRuntimeProxy 并在事件发生时接收通知。 +RuntimePlugins 应该根据通知消息完成资源隔离策略,然后响应给 KoordRuntimeProxy,KoordRuntimeProxy 将根据插件的响应决定将请求转移到后端 Containerd 或丢弃。 + +如果没有注册 RuntimePlugins,KoordRuntimeProxy 将成为 Kubelet 和 Containerd 之间的透明代理。 + +## 架构 + +![image](/img/koord-runtime-proxy-design.svg) + +KoordRounmeProxy 有4个主要组件。 + +### CRI Server + +KoordRuntimeProxy 作为 Kubelet 和 Containerd 之间的代理,充当 Kubelet 的 CRI 服务器(Dockershim 场景下的 Http 服务器)。它应该拦截来自 Kubelet 的所有请求,并在与后端 Containerd(Dockerd) 调用之前和之后生成与插件调用的协议。 + +### Plugins Manager + +PluginsManager 负责动态解析来自 `/etc/runtime/hookserver.d` 的插件注册信息。 + +### Runtime Dispatcher + +RuntimeDispatcher 旨在管理与插件的通信。 + +### Store + +作为代理,KoordRuntimeProxy 最好设计为无状态,但有时候现实并不完美。 +以 StartContainer hook 为例,CRI StartContainerRequest 中只有 ContainerID,这不足以让插件调整策略,因为插件可能不会在本地存储 Pod/Container 信息(如 Meta、Priority)。所以 KoordRuntimeProxy 应该在 RunPodSandbox/CreateContainer 阶段存储 Pod/Container 信息。当 StartContainer 请求到来时,KoordRuntimeProxy 可以通过 ContainerID 获取 Pod/Container 信息,然后使用 Pod/Container 信息调用插件。 + +有了 Store,每次 KoordRuntimeProxy 调用插件都会有 Pod/Container 信息,所以插件不需要特别存储 Pod/Container 信息,插件可以设计成无状态的。 + +考虑到性能,Store 位于内存中,不会产生外部 IO 到磁盘。 + +## Runtime Plugins + +### 如何注册插件 +所有的插件配置文件都应该放在 `/etc/runtime/hookserver.d` 并带有 `.json` 后缀。您可以使用 RuntimeProxy 注册 Koordlet 实现的插件: + +1. touch /etc/runtime/hookserver.d/koordlet.json +2. 将以下内容复制到 /etc/runtime/hookserver.d/koordlet.json +``` +{ + "remote-endpoint": "/var/run/koordlet/koordlet.sock", + "failure-policy": "Ignore", + "runtime-hooks": [ + "PreRunPodSandbox", + "PreCreateContainer", + "PreStartContainer" + ] +} +``` + + +涉及3个字段: +- remote-endpoint: KoordRuntimeProxy 与插件对话端点,由插件生成。 +- failure-policy: 调用插件失败时的策略,失败或忽略,默认为忽略。 +- runtime-hooks: 目前有7个钩点: + 1. PreRunPodSandbox + 2. PreCreateContainer + 3. PreStartContainer + 4. PostStartContainer + 5. PreUpdateContainerResources + 6. PostStopContainer + 7. PostStopPodSandbox + +带有前缀 “Pre” 的挂钩点表示在将请求传输到 Contianerd(Dockerd) 之前调用插件。带有前缀 “Post“ 的挂钩点意味着在收到来自 Containerd(Dockerd) 的响应后调用插件。插件提供者可以将任何钩子组合设置为“运行时钩子”。 + +### KoordRunmeProxy 和 Plugins 之间的协议 +[Protocols](https://github.com/koordinator-sh/koordinator/blob/main/apis/runtime/v1alpha1/api.proto) + +### Runtime Plugins 例子 +[koordlet-runtime-plugin-design](https://github.com/koordinator-sh/koordinator/blob/main/docs/design-archive/koordlet-runtime-hooks.md) + +## 安装 + +### 源代码安装 +获取源代码:`git clone https://github.com/koordinator-sh/koordinator.git` + +构建:`cd koordinator; make build-koord-runtime-proxy` + +### 包安装 +下载最新发布的程序包:`https://github.com/koordinator-sh/koordinator/releases` + +### 配置 Kubelet +在 Containerd 场景下,为了让 koord-runtime-proxy 成为 Kubelet 和 Containerd 之间的代理,Kubelet 的参数需要修改如下: +``` +kubelet --container-runtime=remote --container-runtime-endpoint=unix:///var/run/koord-runtimeproxy/runtimeproxy.sock +``` + +在 Docker 场景下,为了让 koord-runtime-proxy 成为 Kubelet 和 Dockerd 之间的代理,Kubelet 的参数需要修改如下: +``` +kubelet --docker-endpoint=unix:///var/run/koord-runtimeproxy/runtimeproxy.sock +``` + +### 配置 KoordRuntimeProxy +首先,请确保您的运行时后端是 Containerd 或 Dockerd。 + +在 Containerd 场景下,koord-runtime-proxy 可以使用以下命令设置: +``` +koord-runtime-proxy --remote-runtime-service-endpoint= + --remote-image-service-endpoint= +``` +如果 Containerd 在默认 `/var/run/koord-runtimeproxy/runtimeproxy.sock` 上监听 CRI 请求,koord-runtime-proxy 可以通过以下方式设置: +``` +koord-runtime-proxy +``` + +在 Docker 场景下,koord-runtime-proxy 应该使用附加参数 `--backend-runtime-mode Docker` 设置,并且没有 `remote-image-service-endpoint`: +``` +koord-runtime-proxy --backend-runtime-mode=Docker --remote-runtime-service-endpoint= +``` diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/installation.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/installation.md new file mode 100644 index 000000000..15a71e334 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/installation.md @@ -0,0 +1,231 @@ +# 安装 + +Koordinator 依赖 **Kubernetes version >= 1.18**。 + +Koordinator 需要从 kubelet 只读端口收集指标(默认设置为禁用)。 +更多信息 [here](https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/). + +为了最好的体验,koordinator 推荐 **linux kernel 4.19** 或者更高版本。 + + +## 使用 Helm 安装 + +Koordinator 可以使用 Helm v3.5+ 安装, Helm 是一个简单的命令行工具,更多信息 [here](https://github.com/helm/helm/releases). + +```bash +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Install the latest version. +$ helm install koordinator koordinator-sh/koordinator --version 1.3.0 +``` + +## 使用 Helm 升级 + +```bash +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Upgrade the latest version. +$ helm upgrade koordinator koordinator-sh/koordinator --version 1.3.0 [--force] +``` + +注意: + +1. 升级前,为确保你了解新版本中的重大更改,你 **必须** 先阅读 [变更日志](https://github.com/koordinator-sh/koordinator/blob/master/CHANGELOG.md)。 +2. 如果你想删除或者新增旧版本中的 Chart 参数,推荐在 `helm upgrade` 命令中添加参数 `--reset-values` 。否则,你应该使用 `--reuse-values` 参数来恢复上一个版本的值。 + +## 可选:手动下载 Charts + +如果你在生产环境中连接到 `https://koordinator-sh.github.io/charts/` 时遇到问题,你可能需要从 [此处](https://github.com/koordinator-sh/charts/releases) 手动下载 Charts 进行安装或升级。 + +```bash +$ helm install/upgrade koordinator /PATH/TO/CHART +``` + +## 启用 NRI 资源管理模式 + +### 前置条件 + +- Containerd >= 1.7.0 且配置启用 NRI。请确保 NRI 已在 containerd 中启用,否则请参考 [Enable NRI in Containerd](https://github.com/containerd/containerd/blob/main/docs/NRI.md)。 +- Koordinator >= 1.3 + +### 配置方式 + +NRI 资源管理模式是*默认启用*的。你无需修改 koordlet 配置就可以使用它,也可以通过设置 `enable-nri-runtime-hook=false` 的 koordlet 启动参数来禁用它。当它的前置条件不满足时,启用也不会影响其他功能。 + +## 安装 koord-runtime-proxy + +koord-runtime-proxy 充当 Kubelet 和 Containerd 之间的代理(Dockershim 场景下的 Dockerd),旨在拦截 CRI 请求, 并应用一些资源管理策略,比如在混合工作负载编排场景下通过 Pod 优先级设置不同的 CGroup 参数,为最新的 Linux 内核应用新的隔离策略, CPU 架构等等。 + +### 1、下载二进制文件 + +从 Github 下载: +```bash +$ # select the version +$ wget https://github.com/koordinator-sh/koordinator/releases/download/v1.3.0/koord-runtime-proxy_1.3.0.linux_x86_64 -O koord-runtime-proxy +$ chmod +x koord-runtime-proxy +``` + +或者,你可以从源代码开始构建: +```bash +$ git clone https://github.com/koordinator-sh/koordinator.git +$ cd koordinator +$ make build-koord-runtime-proxy +``` + +### 2、设置 koord-runtime-proxy + +首先,请确保你的运行时后端是 Containerd 或 Dockerd。 + +在 Containerd 场景下,如果 Containerd 在默认的 `/var/run/containerd/containerd.sock` 监听 CRI 请求,koord-runtime-proxy 可以这样设置(无需任何参数): + +``` +koord-runtime-proxy +``` + +或者使用以下命令设置: + +``` +koord-runtime-proxy \ + --remote-runtime-service-endpoint= \ + --remote-image-service-endpoint= +``` + +在 Docker 的场景下,koord-runtime-proxy 应该使用附加参数设置 `--backend-runtime-mode Docker`,无需 `remote-image-service-endpoint`: + +``` +koord-runtime-proxy \ + --backend-runtime-mode=Docker \ + --remote-runtime-service-endpoint= +``` + +koord-runtime-proxy 将监听 `/var/run/koord-runtimeproxy/runtimeproxy.sock`。 + +### 3、设置 Kubelet + +要使 koord-runtime-proxy 成为 Kubelet 和 Containerd 之间的代理,应修改 Kubelet 参数,如下所示: + +``` +kubelet \ + --container-runtime=remote \ + --container-runtime-endpoint=unix:///var/run/koord-runtimeproxy/runtimeproxy.sock +``` + +在 Docker 的场景下, 应修改 Kubelet 参数如下: + +``` +kubelet --docker-endpoint=unix:///var/run/koord-runtimeproxy/runtimeproxy.sock +``` + + +## 可选 + +请注意,直接安装这个 Chart 意味着使用 Koordinator 的默认模板值。 + +如果将其部署到生产集群中,或者你想要配置 `feature-gates`,你可能需要设置特定配置。 + +### 可选: Chart 参数 + +下表列出了 Chart 可配置参数及其默认值。 + +| Parameter | Description | Default | +| ----------------------------------------- | ---------------------------------------------------------------- |---------------------------------| +| `featureGates` | Feature gates for Koordinator, empty string means all by default | ` ` | +| `installation.namespace` | namespace for Koordinator installation | `koordinator-system` | +| `installation.createNamespace` | Whether to create the installation.namespace | `true` | +| `imageRepositoryHost` | Image repository host | `ghcr.io` | +| `manager.log.level` | Log level that koord-manager printed | `4` | +| `manager.replicas` | Replicas of koord-manager deployment | `2` | +| `manager.image.repository` | Repository for koord-manager image | `koordinatorsh/koord-manager` | +| `manager.image.tag` | Tag for koord-manager image | `v1.3.0` | +| `manager.resources.limits.cpu` | CPU resource limit of koord-manager container | `1000m` | +| `manager.resources.limits.memory` | Memory resource limit of koord-manager container | `1Gi` | +| `manager.resources.requests.cpu` | CPU resource request of koord-manager container | `500m` | +| `manager.resources.requests.memory` | Memory resource request of koord-manager container | `256Mi` | +| `manager.metrics.port` | Port of metrics served | `8080` | +| `manager.webhook.port` | Port of webhook served | `9443` | +| `manager.nodeAffinity` | Node affinity policy for koord-manager pod | `{}` | +| `manager.nodeSelector` | Node labels for koord-manager pod | `{}` | +| `manager.tolerations` | Tolerations for koord-manager pod | `[]` | +| `manager.resyncPeriod` | Resync period of informer koord-manager, defaults no resync | `0` | +| `manager.hostNetwork` | Whether koord-manager pod should run with hostnetwork | `false` | +| `scheduler.log.level` | Log level that koord-scheduler printed | `4` | +| `scheduler.replicas` | Replicas of koord-scheduler deployment | `2` | +| `scheduler.image.repository` | Repository for koord-scheduler image | `koordinatorsh/koord-scheduler` | +| `scheduler.image.tag` | Tag for koord-scheduler image | `v1.3.0` | +| `scheduler.resources.limits.cpu` | CPU resource limit of koord-scheduler container | `1000m` | +| `scheduler.resources.limits.memory` | Memory resource limit of koord-scheduler container | `1Gi` | +| `scheduler.resources.requests.cpu` | CPU resource request of koord-scheduler container | `500m` | +| `scheduler.resources.requests.memory` | Memory resource request of koord-scheduler container | `256Mi` | +| `scheduler.port` | Port of metrics served | `10251` | +| `scheduler.nodeAffinity` | Node affinity policy for koord-scheduler pod | `{}` | +| `scheduler.nodeSelector` | Node labels for koord-scheduler pod | `{}` | +| `scheduler.tolerations` | Tolerations for koord-scheduler pod | `[]` | +| `scheduler.hostNetwork` | Whether koord-scheduler pod should run with hostnetwork | `false` | +| `koordlet.log.level` | Log level that koordlet printed | `4` | +| `koordlet.image.repository` | Repository for koordlet image | `koordinatorsh/koordlet` | +| `koordlet.image.tag` | Tag for koordlet image | `v1.3.0` | +| `koordlet.resources.limits.cpu` | CPU resource limit of koordlet container | `500m` | +| `koordlet.resources.limits.memory` | Memory resource limit of koordlet container | `256Mi` | +| `koordlet.resources.requests.cpu` | CPU resource request of koordlet container | `0` | +| `koordlet.resources.requests.memory` | Memory resource request of koordlet container | `0` | +| `koordlet.enableServiceMonitor` | Whether to enable ServiceMonitor for koordlet | `false` | +| `webhookConfiguration.failurePolicy.pods` | The failurePolicy for pods in mutating webhook configuration | `Ignore` | +| `webhookConfiguration.timeoutSeconds` | The timeoutSeconds for all webhook configuration | `30` | +| `crds.managed` | Koordinator will not install CRDs with chart if this is false | `true` | +| `imagePullSecrets` | The list of image pull secrets for koordinator image | `false` | + +使用 `helm install` 或 `helm upgrade` 的 `--set key=value[,key=value]` 参数指定每个参数。 + +### 可选: feature-gate + +Feature-Gate 控制 Koordinator 中的一些有影响力的功能: + +| Name | Description | Default | Effect (if closed) | +| ------------------------- | ---------------------------------------------------------------- | ------- | -------------------------------------- | +| `PodMutatingWebhook` | Whether to open a mutating webhook for Pod **create** | `true` | Don't inject koordinator.sh/qosClass, koordinator.sh/priority and don't replace koordinator extend resources ad so on | +| `PodValidatingWebhook` | Whether to open a validating webhook for Pod **create/update** | `true` | It is possible to create some Pods that do not conform to the Koordinator specification, causing some unpredictable problems | + + +如果要配置 feature-gate ,只需在安装或升级时设置参数即可。如: + +```bash +$ helm install koordinator https://... --set featureGates="PodMutatingWebhook=true\,PodValidatingWebhook=true" +``` + +如果要启用所有 feature-gates ,请将参数设置为 `featureGates=AllAlpha=true` 。 + +### 可选: 中国本地镜像 + +如果你在中国并且无法从官方 DockerHub 拉取镜像,你可以使用托管在阿里云上的镜像仓库: + +```bash +$ helm install koordinator https://... --set imageRepositoryHost=registry.cn-beijing.aliyuncs.com +``` + +## 最佳实践 + +### AWS EKS 的安装参数 + +在 EKS 上使用自定义 CNI(例如 Weave 或 Calico)时,默认情况下无法访问 webhook。发生这种情况是因为在 EKS 上控制平面无法配置运行自定义 CNI ,因此控制平面和工作节点之间的 CNI 不同。 + +为了解决这个问题,使用 helm install 或 upgrade 时设置 `--set manager.hostNetwork=true`,webhook 可以在主机网络中运行。 + +## 卸载 + +请注意,这将导致 Koordinator 创建的所有资源,包括 Webhook 配置、Services、Namespace、CRD 和由 Koordinator 控制器管理的 CR 实例,都被删除! +请在充分了解后果的情况下才这样做。 + +卸载通过 Chart 安装的 Koordinator : + +```bash +$ helm uninstall koordinator +release "koordinator" uninstalled +``` diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/introduction.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/introduction.md new file mode 100644 index 000000000..784b88edd --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/introduction.md @@ -0,0 +1,48 @@ +--- +title: 简介 +slug: / +--- + +# 简介 + +欢迎来到 Koordinator! + +## 概述 + +Koordinator 是一个基于 QoS 的 Kubernetes 混合工作负载调度系统。它旨在提高对延迟敏感的工作负载和批处理作业的运行时效率和可靠性,简化与资源相关的配置调整的复杂性,并增加 Pod 部署密度以提高资源利用率。 + + +## 关键特性 + +Koordinator 通过提供以下功能增强了在 Kubernetes 中管理工作负载的用户体验: + +- 精心设计的优先级和 QoS 机制,可将不同类型的工作负载混跑在集群中,并在单个节点上运行不同类型的 Pod 。 +- 允许资源超卖以实现高资源利用率,但仍通过利用应用程序分析机制来满足 QoS 保证。 +- 细粒度的资源协调和隔离机制,以提高延迟敏感的工作负载和批处理作业的效率。 +- 灵活的作业调度机制,支持特定领域的工作负载,例如大数据、人工智能、音频和视频。 +- 一整套用于监控、故障排除和操作的工具。 + + +## Koordinator vs 其他概念 + +### Koordinator QoS vs Kubernetes QoS + +Kubernetes 提供三种类型的 QoS: Guaranteed/Burstable/BestEffort,其中 Guaranteed/Burstable 被广泛使用 BestEffort 很少使用。Koordinator 与 Kubernetes QoS 兼容,并且对每种类型都有许多增强功能。为了避免干扰原生 QoS 语义,Koordinator 引入了一个独立的字段 `koordinator.sh/qosClass` 来描述混部 QoS。该 QoS 描述了在混部场景中节点上运行的 Pod 的服务质量。它是混合系统最关键的语义。 + +Koordinator 与 Kubernetes QoS 兼容,并且对每种类型都有许多增强功能。 + +### Koordinator scheduler vs kube-scheduler + +Koordinator 调度器并非旨在取代 kube-scheduler,而是为了让混部的工作负载在 kubernetes 上运行得 **更好**。 + +Koordinator 调度器是基于 schedule-framework 开发的,在原生调度能力之上增加了与混部和优先级抢占相关的调度插件。Koordinator 将致力于推动相关的增强进入 Kubernetes 的上游社区,推动混部技术的标准化。 + + +## 接下来 + +推荐后续步骤: + +- 开始 [安装 Koordinator ](./installation). +- 学习 Koordinator 的 [架构](architecture/overview). + + diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/colocation-profile.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/colocation-profile.md new file mode 100644 index 000000000..6ae88d637 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/colocation-profile.md @@ -0,0 +1,136 @@ +--- +sidebar_position: 1 +--- + +# Colocation Profile + +## Motivation + +如果现有集群中的工作负载想要通过 Koordinator 进行混合部署,则需要修改现有的 Controller/Operator 以支持 Koordinator 定义的 QoS Class、优先级和资源模型等协议。为了降低 Koordinator 混部系统的使用门槛,让大家可以简单快速的使用混部技术获得收益,因此 Koordinator 提供了一个 `ClusterColocationProfile` CRD 和 对应的 Webhook 修改和验证新创建的 Pod,注入 `ClusterColocationProfile` 中描述的字段。 + + +## 构架 + +![image](/img/clustercolocationprofile-arch.png) + +## Feature Gates + +ClusterColocationProfile mutating/validating 功能默认是打开的,如果想要关闭,请设置 Feature Gates: + +```bash +$ helm install koordinator https://... --set featureGates="PodMutatingWebhook=false\,PodValidatingWebhook=false" +``` + + +## 规格定义 + +如果您对 Kubernetes 资源不熟悉,请参考页面 [了解 Kubernetes 对象](https://kubernetes.io/docs/concepts/overview/working-with-objects/kubernetes-objects/)。 + +- **namespaceSelector**: 如果命名空间与选择器匹配,则决定是否改变/验证 Pod。 LabelSelector 默认为空,它将匹配所有 Namespace。 + +- **selector**: 如果 Pod 与选择器匹配,则决定是否改变/验证 Pod。 默认为空的 LabelSelector,它将匹配所有 Pod。 + +- **qosClass** (*required*): 描述了 Pod 的 Koordinator QoSClass。该值以标签 `koordinator.sh/qosClass` 的形式更新到 Pod 中。对应的选项为 `LSE`、`LSR`、`LS`、`BE` 和 `SYSTEM`。 有关更多信息,请查看页面[此处](../architecture/qos)。 + +- **priorityClassName** (*required*): 指定要写入到 Pod.Spec.PriorityClassName 中的 Kubenretes PriorityClass. 选项为 `koord-prod`、`koord-mid`、`koord-batch` 和 `koord-free`。有关更多信息,请查看 [此处](../architecture/priority)。 + +- **koordinatorPriority**: Koordinator 还提供了 Pod 级别的子优先级 sub-priority。 优先级值将作为标签 `koordinator.sh/priority` 更新到 Pod。 各个 Koordinator 组件通过 KoordinatorPriority 和 PriorityClassName 中的优先级值来确定 Koordinator 中 Pod 的优先级,值越高,优先级越高。 + +- **labels**: 描述需要注入到 `Pod.Labels` 的 k/v 键值对。 + +- **annotations**: 描述了需要注入到 `Pod.Annotations` 的 k/v 键值对。 + +- **schedulerName**: 如果指定,则 Pod 将由指定的调度器调度。 + +- **patch**: 表示用户想要注入 Pod 的 Pod 模板补丁。 + + +## 例子 + +### 创建 ClusterColocationProfile + +下面的 `profile.yaml` 文件描述了对所有含有标签 `koordinator.sh/enable-colocation=true` 的 Namespace 下的所有含有标签 `koordinator.sh/enable-colocation=true` 的 Pod 进行修改,注入 Koordinator QoSClass、Koordinator Priority 等。 + +```yaml +apiVersion: config.koordinator.sh/v1alpha1 +kind: ClusterColocationProfile +metadata: + name: colocation-profile-example +spec: + namespaceSelector: + matchLabels: + koordinator.sh/enable-colocation: "true" + selector: + matchLabels: + koordinator.sh/enable-colocation: "true" + qosClass: BE + priorityClassName: koord-batch + koordinatorPriority: 1000 + schedulerName: koord-scheduler + labels: + koordinator.sh/mutated: "true" + annotations: + koordinator.sh/intercepted: "true" + patch: + spec: + terminationGracePeriodSeconds: 30 +``` + +基于 YAML 文件创建 ClusterColocationProfile: + +```bash +$ kubectl apply -f profile.yaml +``` + +### 验证 ClusterColocationProfile 是否生效 + +```yaml +apiVersion: v1 +kind: Pod +metadata: + labels: + koordinator.sh/enable-colocation: "true" + name: test-pod +spec: + containers: + - name: app + image: nginx:1.15.1 + resources: + limits: + cpu: "1" + memory: "3456Mi" + requests: + cpu: "1" + memory: "3456Mi" +``` + +创建这个 Pod,现在你会发现该 Pod 被注入了 Koordinator QoSClass、Koordinator Priority 等。 + +```bash +$ kubectl get pod test-pod -o yaml +apiVersion: v1 +kind: Pod +metadata: + annotations: + koordinator.sh/intercepted: true + labels: + koordinator.sh/qosClass: BE + koordinator.sh/priority: 1000 + koordinator.sh/mutated: true + ... +spec: + terminationGracePeriodSeconds: 30 + priority: 5000 + priorityClassName: koord-batch + schedulerName: koord-scheduler + containers: + - name: app + image: nginx:1.15.1 + resources: + limits: + kubernetes.io/batch-cpu: "1000" + kubernetes.io/batch-memory: 3456Mi + requests: + kubernetes.io/batch-cpu: "1000" + kubernetes.io/batch-memory: 3456Mi +``` \ No newline at end of file diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/cpu-burst.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/cpu-burst.md new file mode 100644 index 000000000..315ab8661 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/cpu-burst.md @@ -0,0 +1,197 @@ +# CPU Burst + +## Introduction + +CPU Burst is a service level objective (SLO)-aware resource scheduling feature provided by Koordinator. You can use CPU Burst to improve the performance of latency-sensitive applications. CPU scheduling for a container may be throttled by the kernel due to the CPU limit, which downgrades the performance of the application. The koordlet component automatically detects CPU throttling events and automatically adjusts the CPU limit to a proper value. This greatly improves the performance of latency-sensitive applications. + +### How CPU Burst works + +Kubernetes allows you to specify CPU limits, which can be reused based on time-sharing. If you specify a CPU limit for a container, the OS limits the amount of CPU resources that can be used by the container within a specific time period. For example, you set the CPU limit of a container to 2. The OS kernel limits the CPU time slices that the container can use to 200 milliseconds within each 100-millisecond period. + +CPU utilization is a key metric that is used to evaluate the performance of a container. In most cases, the CPU limit is specified based on CPU utilization. CPU utilization on a per-millisecond basis shows more spikes than on a per-second basis. If the CPU utilization of a container reaches the limit within a 100-millisecond period, CPU throttling is enforced by the OS kernel and threads in the container are suspended for the rest of the time period, as shown in the following figure. + +![image](/img/cpu-throttles.png) + +The following figure shows the thread allocation of a web application container that runs on a node with four vCPUs. The CPU limit of the container is set to 2. The overall CPU utilization within the last second is low. However, Thread 2 cannot be resumed until the third 100-millisecond period starts because CPU throttling is enforced somewhere in the second 100-millisecond period. This increases the response time (RT) and causes long-tail latency problems in containers. + +![image](/img/cpu-throttles-1.png) + +Upstream Linux kernel >=5.14 and Anolis OS both provide [Burstable CFS Controller](https://github.com/torvalds/linux/commit/f4183717b370ad28dd0c0d74760142b20e6e7931#diff-cc1a82129952a910fdc4292448c2a097a2ba538bebefcf3c06381e45639ae73e), namely *CPU Burst* feature. It allows a container to accumulate CPU time slices when the container is idle. The container can use the accumulated CPU time slices to burst above the CPU limit when CPU utilization spikes. This improves performance and reduces the RT of the container. + +![image](/img/cpu-throttles-2.png) + +For kernel versions that do not support CPU Burst, koordlet detects CPU throttling events and dynamically adjusts the CPU limit to achieve the same effect as CPU Burst. + +For more information about CPU Burst, see the presentation at KubeCon 2021: [CPU Burst: Getting Rid of Unnecessary Throttling, Achieving High CPU Utilization and Application Performance at the Same Time](https://kccncosschn21.sched.com/event/pcdF?spm=a2c63.p38356.0.0.2ec3731dhQbCIe&iframe=no). + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 +- Koordinator >= 0.3 + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to +[Installation](/docs/installation). + +### Configurations + +Koordlet has already enabled CPU Burst feature (`-feature-gates=AllAlpha=true`). If not, please enable it manually by updating the feature gate in the koordlet daemonset. + +NOTE: CPU Burst is not available for `LSR` and `BE` pods since it targets on burstable cpu usages. + +```yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: koordlet +spec: + selector: + matchLabels: + koord-app: koordlet + template: + metadata: + labels: + koord-app: koordlet + spec: + containers: + - command: + - /koordlet + args: + - -CgroupRootDir=/host-cgroup/ + - -feature-gates=XXXX,CPUBurst=true # enable CPU Burst feature + ... +``` + +## Use CPU Burst + +### Use an annotation to enable CPU Burst for the pod + +Add the following annotation to the pod configuration to enable CPU Burst: + +```yaml +apiVersion: apps/v1 +kind: Pod +metadata: + name: demo-pod-xxx + annotations: + # Set the value to auto to enable CPU Burst for the pod. + koordinator.sh/cpuBurst: '{"policy": "auto"}' + # To disable CPU Burst for the pod, set the value to none. + #koordinator.sh/cpuBurst: '{"policy": "none"}' +``` + +### Use a ConfigMap to enable CPU Burst for all pods in a cluster + +Modify the slo-controller-config ConfigMap based on the following content to enable CPU Burst for all pods in a cluster: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + cpu-burst-config: '{"clusterStrategy": {"policy": "auto"}}' + #cpu-burst-config: '{"clusterStrategy": {"policy": "cpuBurstOnly"}}' + #cpu-burst-config: '{"clusterStrategy": {"policy": "none"}}' +``` + +### (Optional) Advanced Settings + +The following code block shows the pod annotations and ConfigMap fields that you can use for advanced configurations: + +```yaml +# Example of the slo-controller-config ConfigMap. +data: + cpu-burst-config: | + { + "clusterStrategy": { + "policy": "auto", + "cpuBurstPercent": 1000, + "cfsQuotaBurstPercent": 300, + "sharePoolThresholdPercent": 50, + "cfsQuotaBurstPeriodSeconds": -1 + } + } + + # Example of pod annotations. + koordinator.sh/cpuBurst: '{"policy": "auto", "cpuBurstPercent": 1000, "cfsQuotaBurstPercent": 300, "cfsQuotaBurstPeriodSeconds": -1}' +``` + +The following table describes the ConfigMap fields that you can use for advanced configurations of CPU Burst. + +| Field | Data type | Description | +| ---------------------------- | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| policy | string |
  • none: disables CPU Burst. If you set the value to none, the related fields are reset to their original values. This is the default value.
  • cpuBurstOnly: enables the CPU Burst feature only for the kernel of Anolis OS or upstream linux kernel >= 5.14.
  • cfsQuotaBurstOnly: enables automatic adjustment of CFS quotas of general kernel versions.
  • auto: enables CPU Burst and all the related features.
| +| cpuBurstPercent | int | Default value:`1000`. Unit: %. This field specifies the percentage to which the CPU limit can be increased by CPU Burst. If the CPU limit is set to `1`, CPU Burst can increase the limit to 10 by default. | +| cfsQuotaBurstPercent | int | Default value:`300`. Unit: %. This field specifies the maximum percentage to which the value of cfs_quota in the cgroup parameters can be increased. By default, the value of cfs_quota can be increased to at most three times. | +| cfsQuotaBurstPeriodSeconds | int | Default value:`-1`. Unit: seconds. This indicates that the time period in which the container can run with an increased CFS quota is unlimited. This field specifies the time period in which the container can run with an increased CFS quota, which cannot exceed the upper limit specified by `cfsQuotaBurstPercent`. | +| sharePoolThresholdPercent | int | Default value:`50`. Unit: %. This field specifies the CPU utilization threshold of the node. If the CPU utilization of the node exceeds the threshold, the value of cfs_quota in cgroup parameters is reset to the original value. | + +### Verify CPU Burst + +1. Use the following YAML template to create an apache-demo.yaml file. + +> To enable CPU Burst for a pod, specify an annotation in the annotations parameter of the metadata section of the pod configuration. + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: apache-demo + annotations: + koordinator.sh/cpuBurst: '{"policy": "auto"}' # Use this annotation to enable or disable CPU Burst. +spec: + containers: + - command: + - httpd + - -D + - FOREGROUND + image: koordinatorsh/apache-2-4-51-for-slo-test:v0.1 + imagePullPolicy: Always + name: apache + resources: + limits: + cpu: "4" + memory: 10Gi + requests: + cpu: "4" + memory: 10Gi + nodeName: # $nodeName Set the value to the name of the node that you use. + hostNetwork: False + restartPolicy: Never + schedulerName: default-scheduler +``` + +2. Run the following command to create an application by using Apache HTTP Server. + +```bash +kubectl apply -f apache-demo.yaml +``` + +3. Use the wrk2 tool to perform stress tests. + +```bash +# Download, decompress, and then install the wrk2 package. +# The Gzip module is enabled in the configuration of the Apache application. The Gzip module is used to simulate the logic of processing requests on the server. +# Run the following command to send requests. Replace the IP address in the command with the IP address of the application. +./wrk -H "Accept-Encoding: deflate, gzip" -t 2 -c 12 -d 120 --latency --timeout 2s -R 24 http://$target_ip_address:8010/static/file.1m.test +``` + +4. Check the results of CPU Burst enabled and disabled. + +e.g. We may have the following results: + +| CentOS 7 | Disabled | Enabled | +| ----------------------------- | ----------- | ------------------- | +| apache RT-p99 | 111.69 ms | 71.30 ms (-36.2%) | +| CPU Throttled Ratio | 33% | 0% | +| Average pod CPU utilization | 32.5% | 33.8% | + +The preceding metrics indicate the following information: + +- After CPU Burst is enabled, the P99 latency of apache is greatly reduced. +- After CPU Burst is enabled, CPU throttling is stopped and the average pod CPU utilization remains approximately at the same value. diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/cpu-evict.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/cpu-evict.md new file mode 100644 index 000000000..ed22cf1cc --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/cpu-evict.md @@ -0,0 +1,137 @@ +# 基于CPU资源满足度的驱逐策略 + +## 简介 + +Koordinator提供了CPU的[动态压制能力](/docs/user-manuals/cpu-suppress),在混部场景下可以根据高优先级Pod(LS)的资源用量情况, +动态调整低优先级Pod(BE)可以使用的CPU资源上限,当LS Pod的资源用量上升时,koordlet将缩减BE Pod可使用的CPU核心。然而,当LS Pod负载突增时, +可能会导致大量BE Pod被压制在少量CPU上,使得这部分Pod的资源满足度较低,应用运行及其缓慢,甚至额外引入一些内核资源的竞争。 + +事实上,大部分BE Pod的离线任务都有较好的重试能力,可以接受一定程度的驱逐而换取更高的资源质量。Koordlet提供了基于CPU资源满足度的驱逐策略, +计算被压制部分的CPU利用率和资源满足度,当利用率和资源满足度同时超过配置的阈值时,会依次按更低优先级、更高的Pod CPU利用率对BE Pod进行驱逐, +直至CPU资源满足度恢复到阈值以上。 + +![image](/img/cpu-evict.svg) + +## 使用限制 +请确保Koordinator已正确安装在你的集群中。若未安装,请参考[安装文档](https://koordinator.sh/docs/installation)。 +该功能需开启Batch资源动态超卖,并和CPU动态压制能力配合使用,请参考[使用文档](/docs/user-manuals/cpu-suppress)。所需的版本要求情况如下: + +| 组件 | 版本要求 | +| --- | ------- | +| Kubernetes | ≥v1.18 | +| koordinator | ≥v0.4.0 | + +该功能由单机组件Koordlet提供,对应的feature-gate默认关闭,使用前请确保koordlet的启动参数`-feature-gates`中已经添加了`BECPUEvict=true`, +详见[参考示例](https://github.com/koordinator-sh/charts/blob/main/versions/v1.2.0/templates/koordlet.yaml#L36)。 + +## 操作步骤 + +1. 使用以下ConfigMap,创建configmap.yaml文件 + ```yaml + #ConfigMap slo-controller-config 样例。 + apiVersion: v1 + kind: ConfigMap + metadata: + name: slo-controller-config # 以koord-manager实际配置的名字为准,例如ack-slo-config + namespace: koordinator-system # 命名空间以环境中实际安装的情况为准,例如kube-system + data: + # 开启基于CPU资源满足度的驱逐功能。 + resource-threshold-config: | + { + "clusterStrategy": { + "enable": true, + "cpuEvictBESatisfactionLowerPercent": 60, + "cpuEvictBESatisfactionUpperPercent": 80, + "cpuEvictBEUsageThresholdPercent": 90, + "CPUEvictTimeWindowSeconds": 60 + } + } + ``` + + | 参数 | 类型 | 取值范围 | 说明 | + | :-------------- | :------ | :-------- | :----------------------------------------------------------- | + | `enable` | Boolean | true; false | true:集群全局开启CPU资源满足度的驱逐策略。false(默认值):集群全局关闭策略。 | + | `cpuEvictBESatisfactionLowerPercent` | Int | 0~60 | BE CPU资源满足度的驱逐阈值,低于该值时将触发对BE Pod的驱逐。 | + | `cpuEvictBESatisfactionUpperPercent` | Int | cpuEvictBESatisfactionLowerPercent~100 | BE CPU资源满足度的安全阈值,高于该值时将停止对BE Pod的驱逐。 | + | `cpuEvictBEUsageThresholdPercent` | Int | 0~100 | BE CPU利用率阈值,当BE Pod在CPU被压制范围内的利用率高于该值时,才会触发驱逐,默认值为90。 | + | `cpuEvictTimeWindowSeconds` | Int | >=2 | CPU资源满足度和BE CPU利用率计算的时间窗口,单位为秒 | + +2. 查看安装的命名空间下是否存在ConfigMap,以命名空间`koordinator-system`和ConfigMap名字`slo-controller-config`为例,具体以实际安装配置为准。 + + - 若存在ConfigMap `slo-controller-config`,请使用PATCH方式进行更新,避免干扰ConfigMap中其他配置项。 + + ```bash + kubectl patch cm -n koordinator-system slo-controller-config --patch "$(cat configmap.yaml)" + ``` + + - 若不存在ConfigMap `slo-controller-config`,请执行以下命令进行创建ConfigMap。 + + ```bash + kubectl apply -f configmap.yaml + ``` + +3. 使用以下YAML内容,创建be-pod-demo.yaml文件。 + + ```yaml + apiVersion: v1 + kind: Pod + metadata: + name: be-pod-demo + labels: + koordinator.sh/qosClass: 'BE' #指定Pod的QoS级别为BE。 + spec: + containers: + - args: + - '-c' + - '4' + - '--vm' + - '1' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + limits: + kubernetes.io/batch-cpu: 4k + kubernetes.io/batch-memory: 4Gi + requests: + kubernetes.io/batch-cpu: 4k + kubernetes.io/batch-memory: 4Gi + restartPolicy: Always + schedulerName: default-scheduler + # 当ColocationProfile功能开启时(默认启用),priorityClassName是必填的 + priorityClassName: koord-batch + ``` + +4. 执行以下命令,将be-pod-demo部署到集群。 + + ```bash + $ kubectl apply -f be-pod-demo.yaml + ``` + +5. 执行以下命令,查看be-pod-demo状态,等待Pod启动完成。 + + ```bash + $ kubectl get pod be-pod-demo + NAME READY STATUS RESTARTS AGE + be-pod-demo 1/1 Running 0 7s + ``` + +6. 在节点执行以下命令,使用[stress工具](https://linux.die.net/man/1/stress)启动进程, +确保整机内存资源用量被提升到驱逐水位以上,其中`--cpu`参数表示stress进程占用的CPU资源量10核,测试时可根据实际机型情况进行调整。 + + ```bash + $ stress --cpu 10 --vm 1 + ``` +7. 观察be-pod-demo运行情况,可以发现be-pod-demo已经不存在,驱逐信息可以通过event查看到。 + + ```bash + $ kubectl get pod be-pod-demo + Error from server (NotFound): pods "be-pod-demo" not found + + $ kubectl get event + LAST SEEN TYPE REASON OBJECT MESSAGE + 44s Normal Killing pod/be-pod-demo Stopping container stress + 44s Warning evictPodSuccess ${your-pod-object} evict Pod:be-pod-demo, reason: EvictPodByBECPUSatisfaction, message: killAndEvictBEPodsRelease for node(${your-node-id}), need realase CPU : 1200 + ``` diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/cpu-qos.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/cpu-qos.md new file mode 100644 index 000000000..e0309b4ee --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/cpu-qos.md @@ -0,0 +1,189 @@ +# CPU QoS + +## 简介 + +Kubernetes支持将多种类型的应用以容器化的方式部署在同一台宿主机上运行,不同优先级的应用可能会竞争CPU资源,导致应用服务受损。Koordinator支持基于容器的QoS等级,优先保障高优先级应用的CPU性能。本文介绍如何使用容器CPU QoS功能。 + +## 背景 + +为了充分利用机器中的资源,通常会将高优先延迟敏感性LS(Latency-Sensitive)和低优先级BE(Best-Effort)的任务部署在同一台机器上,导致两种不同优先级任务之间存在资源竞争问题。Kubernetes根据应用的CPU Request/Limit,为容器设置物理资源限制,但仍存在容器间对CPU资源的竞争。例如,BE应用和LS应用共享物理核或逻辑核时,当BE应用负载较高时,会干扰LS应用的运行,导致服务响应延迟变高。 + +为了提高LS应用使用CPU资源的稳定性,降低BE应用的干扰,Koordinator基于Alibaba Cloud Linux 2和Anolis OS,提供了容器CPU QoS功能。Koordinator基于Group Identity提供的Linux调度优先级,差异化保障不同优先级应用的CPU调度,将LS应用标识为高优,BE应用标识为低优,在混合部署场景中有效改善LS应用的服务质量。更多信息,请参见[Group Identity功能说明](https://help.aliyun.com/document_detail/338407.htm#task-2129392)。 + +通过启用CPU QoS功能,您可以获取以下功能特性: + +- LS应用的任务唤醒延迟最小化。 +- BE应用的任务唤醒不会对LS容器造成性能影响。 +- BE应用的任务不会通过同时多线程SMT(Simultaneous MultiThreading)调度器共享物理核而对LS应用造成性能影响。 + +## 设置 + +### 前提条件 + +- Kubernetes >= 1.18 + +- Koordinator >= 0.4 + +- 操作系统: + + - Alibaba Cloud Linux 2(版本号详情,请参见[Group Identity功能说明](https://help.aliyun.com/document_detail/338407.htm#task-2129392)) + + - Anolis OS >= 8.6 + - CentOS 7.9 (需要安装龙蜥社区的 CPU 混部调度器插件,请参阅[最佳实践](../best-practices/anolis_plugsched.md)) + +### 安装 + +请确保Koordinator组件已正确安装在你的集群中。如果没有,请参考[安装文档](https://koordinator.sh/docs/installation)。 + +## 使用CPU QoS + +1. 使用以下ConfigMap,创建configmap.yaml文件。 + + ```yaml + #ConfigMap slo-controller-config 样例。 + apiVersion: v1 + kind: ConfigMap + metadata: + name: slo-controller-config + namespace: koordinator-system + data: + #开启容器CPU QoS功能。 + resource-qos-config: | + { + "clusterStrategy": { + "lsClass": { + "cpuQOS": { + "enable": true, + "groupIdentity": 2 + } + }, + "beClass": { + "cpuQOS": { + "enable": true, + "groupIdentity": -1 + } + } + } + } + ``` + + `lsClass`、`beClass`分别用于配置QoS等级为LS、BE的Pod,`cpuQOS`用于配置容器CPU QoS功能。关键参数说明如下: + +| 参数 | 类型 | 取值范围 | 说明 | +| :-------------- | :------ | :-------- | :----------------------------------------------------------- | +| `enable` | Boolean | truefalse | true:集群全局开启容器CPU QoS功能。false:集群全局关闭容器CPU QoS功能。 | +| `groupIdentity` | Int | -1~2 | 表示CPU Group Identity的优先级。默认值依据QoS,LS对应2,BE对应-1。0表示关闭。`groupIdentity`值越大,表示容器在内核调度的优先级越高。例如,按默认配置,QoS等级为LS的容器Group Identity接口配置为`cpu.bvt_warp_ns=2`,BE容器配置为`cpu.bvt_warp_ns=-1`。更多信息,请参见[Group Identity功能说明](https://help.aliyun.com/document_detail/338407.htm#task-2129392)。 | + + + **说明** 对于未指定`koordinator.sh/qosClass`的Pod,Koordinator将参考Pod原生的QoSClass来设置参数,其中Besteffort使用ConfigMap中BE的配置,其他QoSClass使用ConfigMap中LS的配置。 + +2. 查看命名空间`koordinator-system`下是否存在ConfigMap `slo-controller-config`。 + + - 若存在ConfigMap `slo-controller-config`,请使用PATCH方式进行更新,避免干扰ConfigMap中其他配置项。 + + ```bash + kubectl patch cm -n koordinator-system slo-controller-config --patch "$(cat configmap.yaml)" + ``` + + - 若不存在ConfigMap `slo-controller-config`,请执行以下命令进行创建Configmap。 + + ```bash + kubectl apply -f configmap.yaml + ``` + +3. 使用以下YAML内容,创建ls-pod-demo.yaml文件。 + + ```yaml + apiVersion: v1 + kind: Pod + metadata: + name: ls-pod-demo + labels: + koordinator.sh/qosClass: 'LS' #指定Pod的QoS级别为LS。 + spec: + containers: + - command: + - "nginx" + - "-g" + - "daemon off; worker_processes 4;" + image: docker.io/koordinatorsh/nginx:v1.18-koord-example + imagePullPolicy: Always + name: nginx + resources: + limits: + cpu: "4" + memory: 10Gi + requests: + cpu: "4" + memory: 10Gi + restartPolicy: Never + schedulerName: default-scheduler + ``` + +4. 执行以下命令,将ls-pod-demo部署到集群。 + + ```bash + kubectl apply -f ls-pod-demo.yaml + ``` + +5. 执行以下命令,在单机端的Cgroup分组中查看LS Pod的内核Group Identity生效情况。 + + ```bash + cat /sys/fs/cgroup/cpu/kubepods.slice/kubepods-pod1c20f2ad****.slice/cpu.bvt_warp_ns + ``` + + 预期输出: + + ```bash + #LS Pod的Group Identity优先级为2(高优)。 + 2 + ``` + +6. 使用以下YAML内容,创建be-pod-demo.yaml文件。 + + ```yaml + apiVersion: v1 + kind: Pod + metadata: + name: be-pod-demo + labels: + koordinator.sh/qosClass: 'BE' #指定Pod的QoS级别为BE。 + spec: + containers: + - args: + - '-c' + - '1' + - '--vm' + - '1' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + restartPolicy: Always + schedulerName: default-scheduler + # 当ColocationProfile功能开启时(默认启用),priorityClassName是必填的 + priorityClassName: koord-batch + ``` + +7. 执行以下命令,将be-pod-demo部署到集群。 + + ```bash + kubectl apply -f be-pod-demo.yaml + ``` + +8. 执行以下命令,在单机端的Cgroup分组中查看BE Pod的内核Group Identity生效情况。 + + ```bash + cat /sys/fs/cgroup/cpu/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod4b6e96c8****.slice/cpu.bvt_warp_ns + ``` + + 预期输出: + + ```bash + #BE Pod的Group Identity优先级为-1(低优)。 + -1 + ``` + + 由预期输出得到,LS容器为Group Identity高优先级,BE容器为Group Identity低优先级,表示LS容器的CPU服务质量将被优先保障。 + diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/cpu-suppress.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/cpu-suppress.md new file mode 100644 index 000000000..7077acefd --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/cpu-suppress.md @@ -0,0 +1,103 @@ +# CPU Suppress + +## Introduction +In order to ensure the runtime quality of different workloads in co-located scenarios, Koordinator uses the CPU Suppress +mechanism provided by koordlet on the node side to suppress workloads of the Best Effort type when the load increases. +Or increase the resource quota for Best Effort type workloads when the load decreases. + +In the [Dynamic resource overcommitment model](/architecture/resource-model.md) that is provided by +Koordinator, the total amount of reclaimed resources dynamically changes based on the actual amount of resources used +by latency-sensitive (LS/LSR/LSE) pods. Reclaimed resources can be used by BE pods. You can use the dynamic resource +overcommitment feature to improve the resource utilization of a cluster by deploying both LS pods and BE pods in the +cluster. To ensure sufficient CPU resources for the LS pods on a node, you can use koordinator to limit the CPU +usage of the BE pods on the node. The elastic resource limit feature can maintain the resource utilization of a node +below a specified threshold and limit the amount of CPU resources that can be used by BE pods. This ensures the +stability of the containers on the node. + +CPU Threshold indicates the CPU utilization threshold of a node. Pod (LS).Usage indicates the CPU usage of LS pods. +CPU Restriction for BE indicates the CPU usage of BE pods. The amount of CPU resources that can be used by BE pods +is adjusted based on the increase or decrease of the CPU usage of LS pods. We recommend that you use the same value +for CPU Threshold and the reserved CPU watermark in the dynamic resource overcommitment model. +This ensures a consistent level of CPU resource utilization. + +![CPU-Suppress](/img/cpu-suppress-demo.svg) + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 +- Koordinator >= 0.6 + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to +[Installation](/docs/installation). + +### Configurations +When installing through the helm chart, the ConfigMap slo-controller-config will be created in the koordinator-system +namespace, and the CPU Suppress mechanism is enabled by default. If it needs to be closed, refer to the configuration +below, and modify the configuration of the resource-threshold-config section to take effect. + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: {{ .Values.installation.namespace }} +data: + ... + resource-threshold-config: | + { + "clusterStrategy": { + "enable": true, + "cpuSuppressThresholdPercent": 65 + } + } +``` + +#### (Optional) Advanced Settings +Also, the `CPU Suppress` feature allows you to configure the CPU utilization threshold in a fine-grained manner. +The following table describes the parameters. + +| Parameter | Data type | Valid value | Description | +| --------- | --------- | ----------- | ----------- | +| enable | Boolean | true; false | true: enables the elastic resource limit feature; false: disables the elastic resource limit feature. | +| cpuSuppressThresholdPercent | Int | 0~100 | The CPU utilization threshold of the node. Unit: %. Default value: 65. | + +## Use CPU Suppress + +1. Create a configmap.yaml file based on the following ConfigMap content: +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + # Enable the elastic resource limit feature. + resource-threshold-config: | + { + "clusterStrategy": { + "enable": true + } + } +``` + +2. Run the following command to update the ConfigMap. +To avoid changing other settings in the ConfigMap, we commend that you run the kubectl patch command to update the ConfigMap. + +```bash +kubectl patch cm -n koordinator-system slo-controller-config --patch "$(cat configmap.yaml)" +``` + +3. Run the following command to query the CPU cores that are allocated to the BE pods on the node: +```bash +cat /sys/fs/cgroup/cpuset/kubepods.slice/kubepods-besteffort.slice/cpuset.cpus +``` +Expected output: +```bash +10-25,35-51,62-77,87-103 +``` +The output shows that the following CPU cores are allocated to the BE pods on the node: 10-25, 35-51, 62-77, and 87-103, +which will be changed dynamically according to the load of latency-sensitve pods. \ No newline at end of file diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/fine-grained-cpu-orchestration.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/fine-grained-cpu-orchestration.md new file mode 100644 index 000000000..4aa55eafc --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/fine-grained-cpu-orchestration.md @@ -0,0 +1,251 @@ +# 精细化 CPU 编排 + +koord-scheduler 为了提升 CPU 密集型工作负载的性能提供了精细化 CPU 编排能力。 + +## Introduction + +越来越多的系统利用 CPU 和硬件加速器的组合来支持实时计算和高吞吐的并行计算。 许多应用程序都需要高性能环境,包括电信、科学计算、机器学习、金融服务和数据分析。 + +但是,Kubernetes 集群中的 Pod 在多种资源维度上都是共享的,存在相互干扰的问题。 CPU 资源的共享几乎是不可避免的,例如 SMT 线程(即逻辑处理器)共享同一个物理核,同一个芯片中的物理核共享同一个 L3 缓存。 资源竞争会减慢这些对 CPU 敏感的工作负载的运行质量,从而导致延迟升高。 + +为了提高对 CPU 敏感的工作负载的性能,koord-scheduler 提供了一种精细化的 CPU 编排机制。 它增强了 Kubernetes 的 CPU 管理,并支持详细的 NUMA 局部性和 CPU 排除。 + +有关详细信息,请参阅[设计:细粒度 CPU 编排](/docs/designs/fine-grained-cpu-orchestration)。 + +## 设置 + +### 前置条件 + +- Kubernetes >= 1.18 +- Koordinator >= 0.6 + +### 安装 + +请确保 Koordinator 组件已正确安装在你的集群中。 如果没有,请参考[安装文档](/docs/installation)。 + +### 配置全局参数 + +精细化 CPU 编排能力是默认开启的。用户不需要额外的配置即可使用。 + +对于需要深入定制的用户,可以按需修改 Helm Chart 中的配置文件 `koord-scheduler-config` 设置精细化 CPU 编排的参数。 + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: koord-scheduler-config + ... +data: + koord-scheduler-config: | + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: KubeSchedulerConfiguration + profiles: + - schedulerName: koord-scheduler + - pluginConfig: + - name: NodeNUMAResource + args: + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: NodeNUMAResourceArgs + # The default CPU Binding Policy. The default is FullPCPUs + # If the Pod belongs to LSE/LSR Prod Pods, and if no specific CPU binding policy is set, + # the CPU will be allocated according to the default core binding policy. + defaultCPUBindPolicy: FullPCPUs + # the scoring strategy + scoringStrategy: + # the scoring strategy ('MostAllocated', 'LeastAllocated') + # - MostAllocated(default): prefer the node with the least available resources + # - LeastAllocated: prefer the node with the most available resources + type: MostAllocated + # the weights of each resource type + resources: + - name: cpu + weight: 1 + plugins: + # enable the NodeNUMAResource plugin + preFilter: + enabled: + - name: NodeNUMAResource + filter: + enabled: + - name: NodeNUMAResource + ... + score: + enabled: + - name: NodeNUMAResource + weight: 1 + ... + reserve: + enabled: + - name: NodeNUMAResource + preBind: + enabled: + - name: NodeNUMAResource +``` + +koord-descheduler 是通过 Configmap 加载[调度器配置](https://kubernetes.io/docs/reference/scheduling/config/)的。因此需要通过重启调度器才能使用最新的配置。 + + +| 字段 | 说明 | 版本 | +|-------|-------------|---------| +| defaultCPUBindPolicy | 默认的 CPU 绑定策略。 默认值为 FullPCPUs。 如果 Pod 属于 LSE/LSR Prod Pod,并且没有设置具体的 CPU 绑定策略,CPU 则会按照默认的 CPU 绑定策略进行分配。 可选值为 FullPCPUs 和 SpreadByPCPUs | >= v0.6.0 | +| scoringStrategy | 打分策略,可选值为 MostAllocated 和 LeastAllocated | >= v0.6.0 | + +### 按节点配置 + +用户可以单独的为节点设置不同的 CPU 绑定策略和 NUMA Node 选择策略。 + +#### CPU 绑定策略 + +Label `node.koordinator.sh/cpu-bind-policy` 约束了调度时如何按照指定的策略分配和绑定CPU。具体的值定义如下: + +| 值 | 描述 | 版本 | +|-------|-------------|---------| +| None or empty | 不执行任何策略。 | >= v0.6.0 | +| FullPCPUsOnly | 要求调度器必须分配完整的物理核。等价于 kubelet CPU manager policy option full-pcpus-only=true. | >= v0.6.0 | +| SpreadByPCPUs | 要求调度器必须按照物理核维度均匀的分配逻辑核。 | >= v1.1.0 | + +如果节点 Label 上没有 `node.koordinator.sh/cpu-bind-policy`,调度器将会按照 Pod 指定的 CPU 绑定策略或者调度器配置的默认策略分配 CPU。 + +#### NUMA Node 选择策略 + +Label `node.koordinator.sh/numa-allocate-strategy` 表示调度时应该如何选择 NUMA Node。具体的值定义如下: + +| 值 | 描述 | 版本 | +|-------|-------------|---------| +| MostAllocated | MostAllocated 表示选择资源剩余最少的 NUMA Node。| >= v.0.6.0 | +| LeastAllocated | LeastAllocated 表示选择资源剩余最多的NUMA Node。| >= v.0.6.0 | + +如果 `node.koordinator.sh/numa-allocate-strategy` 和 `kubelet.koordinator.sh/cpu-manager-policy` 都设置了, 优先使用 `node.koordinator.sh/numa-allocate-strategy`。 + + +## 使用精细化 CPU 编排 + +1. 按照下面的 YAM了 创建 Deployment `nginx`。 + +> 使用精细化 CPU 编排时,Pod 需要在 Label 中指定具体的 [QoSClass](/docs/architecture/qos#definition) 并指定具体的绑定策略。 + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-lsr + labels: + app: nginx-lsr +spec: + replicas: 3 + selector: + matchLabels: + app: nginx-lsr + template: + metadata: + name: nginx-lsr + labels: + app: nginx-lsr + koordinator.sh/qosClass: LSR # set the QoS class as LSR, the binding policy is FullPCPUs by default + # in v0.5, binding policy should be specified. + # e.g. to set binding policy as FullPCPUs (prefer allocating full physical CPUs of the same core): + #annotations: + #scheduling.koordinator.sh/resource-spec: '{"preferredCPUBindPolicy": "FullPCPUs"}' + spec: + schedulerName: koord-scheduler # use the koord-scheduler + containers: + - name: nginx + image: nginx + resources: + limits: + cpu: '2' + requests: + cpu: '2' + priorityClassName: koord-prod +``` + +2. 创建 `nginx` deployment 并检查调度结果。 + +```bash +$ kubectl create -f nginx-deployment.yaml +deployment/nginx-lsr created +$ kubectl get pods -o wide | grep nginx +nginx-lsr-59cf487d4b-jwwjv 1/1 Running 0 21s 172.20.101.35 node-0 +nginx-lsr-59cf487d4b-4l7r4 1/1 Running 0 21s 172.20.101.79 node-1 +nginx-lsr-59cf487d4b-nrb7f 1/1 Running 0 21s 172.20.106.119 node-2 +``` + +3. 检查 Pod 的 CPU 分配结果 `scheduling.koordinator.sh/resource-status`. + +```bash +$ kubectl get pod nginx-lsr-59cf487d4b-jwwjv -o jsonpath='{.metadata.annotations.scheduling\.koordinator\.sh/resource-status}' +{"cpuset":"2,54"} +``` + +我们可以看到 Pod `nginx-lsr-59cf487d4b-jwwjv` 绑定了 2 个逻辑核,对应的逻辑核 ID 分别是 2 和 54,这两个逻辑核属于同一个物理核。 + +4. 更改 `nginx` deployment 的 CPU 绑定策略。 + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-lsr + labels: + app: nginx-lsr +spec: + replicas: 3 + selector: + matchLabels: + app: nginx-lsr + template: + metadata: + name: nginx-lsr + labels: + app: nginx-lsr + koordinator.sh/qosClass: LSR # set the QoS class as LSR + annotations: + # set binding policy as SpreadByPCPUs (prefer allocating physical CPUs of different cores) + scheduling.koordinator.sh/resource-spec: '{"preferredCPUBindPolicy": "SpreadByPCPUs"}' + spec: + schedulerName: koord-scheduler # use the koord-scheduler + containers: + - name: nginx + image: nginx + resources: + limits: + cpu: '2' + requests: + cpu: '2' + priorityClassName: koord-prod +``` + +5. 更新 `nginx` deployment 并检查调度结果。 + +```bash +$ kubectl apply -f nginx-deployment.yaml +deployment/nginx-lsr created +$ kubectl get pods -o wide | grep nginx +nginx-lsr-7fcbcf89b4-rkrgg 1/1 Running 0 49s 172.20.101.35 node-0 +nginx-lsr-7fcbcf89b4-ndbks 1/1 Running 0 49s 172.20.101.79 node-1 +nginx-lsr-7fcbcf89b4-9v8b8 1/1 Running 0 49s 172.20.106.119 node-2 +``` + +6. 检查 Pod 最新的 CPU 分配结果 `scheduling.koordinator.sh/resource-status`。 + +```bash +$ kubectl get pod nginx-lsr-7fcbcf89b4-rkrgg -o jsonpath='{.metadata.annotations.scheduling\.koordinator\.sh/resource-status}' +{"cpuset":"2-3"} +``` + +现在我们可以看到 Pod `nginx-lsr-59cf487d4b-jwwjv` 绑定了两个逻辑核,对应的 ID 分别是 2,3, 属于两个不同的物理核。 + +7. (可选) 高级配置. + +```yaml + labels: + # koordinator QoS class of the pod. (use 'LSR' or 'LSE' for binding CPUs) + koordinator.sh/qosClass: LSR + annotations: + # `resource-spec` indicates the specification of resource scheduling, here we need to set `preferredCPUBindPolicy`. + # `preferredCPUBindPolicy` indicating the CPU binding policy of the pod ('None', 'FullPCPUs', 'SpreadByPCPUs') + # - None: perform no exclusive policy + # - FullPCPUs(default): a bin-packing binding policy, prefer allocating full physical cores (SMT siblings) + # - SpreadByPCPUs: a spread binding policy, prefer allocating logical cores (SMT threads) evenly across physical cores (SMT siblings) + scheduling.koordinator.sh/resource-spec: '{"preferredCPUBindPolicy": "FullPCPUs"}' +``` diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/fine-grained-device-scheduling.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/fine-grained-device-scheduling.md new file mode 100644 index 000000000..b4a93a337 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/fine-grained-device-scheduling.md @@ -0,0 +1,318 @@ +# Device Scheduling +We provide a fine-grained mechanism for managing GPUs and other devices such as RDMA and FPGA, defines a set of APIs to +describe device information on nodes, including GPU, RDMA, and FPGA, and a new set of resource names to flexibly support +users to apply at a finer granularity GPU resources. This mechanism is the basis for subsequent other GPU scheduling +capabilities such as GPU Share, GPU Overcommitment, etc. + +## Introduction +GPU devices have very strong computing power, but are expensive. How to make better use of GPU equipment, give full play +to the value of GPU and reduce costs is a problem that needs to be solved. In the existing GPU allocation mechanism of +the K8s community, the GPU is allocated by the kubelet, and it is a complete device allocation. This method is simple +and reliable, but similar to the CPU and memory, the GPU will also be wasted. Therefore, some users expect to use only +a portion of the GPU's resources and share the rest with other workloads to save costs. Moreover, GPU has particularities. +For example, the NVLink and oversold scenarios supported by NVIDIA GPU mentioned below both require a central decision +through the scheduler to obtain globally optimal allocation results. + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 +- Koordinator >= 0.71 + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to [Installation](/docs/installation). + +### Configurations + +DeviceScheduling is *Enabled* by default. You can use it without any modification on the koord-scheduler config. + +## Use DeviceScheduling + +### Quick Start + +1.check device crd: + +```bash +$ kubectl get device host04 -o yaml +``` + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: Device +metadata: + creationTimestamp: "2022-10-08T09:26:42Z" + generation: 1 + managedFields: + - apiVersion: scheduling.koordinator.sh/v1alpha1 + fieldsType: FieldsV1 + fieldsV1: + f:metadata: + f:ownerReferences: {} + f:spec: + .: {} + f:devices: {} + f:status: {} + manager: koordlet + operation: Update + time: "2022-10-08T09:26:42Z" + name: host04 + ownerReferences: + - apiVersion: v1 + blockOwnerDeletion: true + controller: true + kind: Node + name: host04 + uid: 09c4f912-6026-467a-85d2-6b2147c9557e + resourceVersion: "39011943" + selfLink: /apis/scheduling.koordinator.sh/v1alpha1/devices/host04 + uid: 5a498e1f-1357-4518-b74c-cab251d6c18c +spec: + devices: + - health: true + id: GPU-04cea5cd-966f-7116-1d58-1ac34421541b + minor: 0 + resources: + kubernetes.io/gpu-core: "100" + kubernetes.io/gpu-memory: 16Gi + kubernetes.io/gpu-memory-ratio: "100" + type: gpu + - health: true + id: GPU-3680858f-1753-371e-3c1a-7d8127fc7113 + minor: 1 + resources: + kubernetes.io/gpu-core: "100" + kubernetes.io/gpu-memory: 16Gi + kubernetes.io/gpu-memory-ratio: "100" + type: gpu +status: {} +``` +We can find this node has two gpu cards, we can find the detail info of each gpu card here. + +2.check node allocatable resource: + +```bash +$ kubectl get node host04 -o yaml +``` + +```yaml +apiVersion: v1 +kind: Node +metadata: + annotations: + flannel.alpha.coreos.com/backend-data: '{"VtepMAC":"5a:69:48:10:29:25"}' + creationTimestamp: "2022-08-29T09:12:55Z" + labels: + beta.kubernetes.io/os: linux + status: + addresses: + - address: 10.15.0.37 + type: InternalIP + - address: host04 + type: Hostname + allocatable: + cpu: "6" + ephemeral-storage: "200681483926" + kubernetes.io/gpu: "200" + kubernetes.io/gpu-core: "200" + kubernetes.io/gpu-memory: 32Gi + kubernetes.io/gpu-memory-ratio: "200" + memory: 59274552Ki + nvidia.com/gpu: "2" + pods: "220" + capacity: + cpu: "8" + kubernetes.io/gpu: "200" + kubernetes.io/gpu-core: "200" + kubernetes.io/gpu-memory: 32Gi + kubernetes.io/gpu-memory-ratio: "200" + memory: 61678904Ki + nvidia.com/gpu: "2" + pods: "220" +``` +We can find the node allocatable resource has merged each gpu card resource. + +3.apply pod: +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example + namespace: default +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + kubernetes.io/gpu: "100" + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +```bash +$ kubectl get pod -n default pod-example -o yaml +``` + +```yaml +apiVersion: v1 +kind: Pod +metadata: + annotations: + scheduling.koordinator.sh/device-allocated: '{"gpu":[{"minor":0,"resources":{"kubernetes.io/gpu-core":"100","kubernetes.io/gpu-memory":"12508288Ki","kubernetes.io/gpu-memory-ratio":"100"}}]}' + creationTimestamp: "2022-10-08T09:33:07Z" + name: pod-example + namespace: default + resourceVersion: "39015044" + selfLink: /api/v1/namespaces/xlf/pods/gpu-pod7 + uid: 6bf1ac3c-0c9f-472a-8b86-de350bbfa795 +spec: + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: "1" + kubernetes.io/gpu: "100" + memory: 256Mi + requests: + cpu: "1" + kubernetes.io/gpu: "100" + memory: 256Mi +status: + conditions: + ... + hostIP: 10.0.0.149 + phase: Running + podIP: 10.244.2.45 + podIPs: + - ip: 10.244.2.45 + qosClass: Guaranteed + startTime: "2022-10-08T09:33:07Z" +``` +You can find the concrete device allocate result through annotation `scheduling.koordinator.sh/device-allocated`. + +4.more apply protocol: +```yaml +apiVersion: v1 +kind: Pod +... +spec: + ... + resources: + requests: + cpu: 40m + memory: 40Mi + nvidia.com/gpu: "100" +``` + +```yaml +apiVersion: v1 +kind: Pod +... +spec: + ... + resources: + requests: + cpu: 40m + memory: 40Mi + kubernetes.io/gpu-core: "100" + kubernetes.io/gpu-memory-ratio: "100" +``` + +```yaml +apiVersion: v1 +kind: Pod +... +spec: + ... + resources: + requests: + cpu: 40m + memory: 40Mi + kubernetes.io/gpu-core: "100" + kubernetes.io/gpu-memory: "16Mi" +``` + +4.device resource debug api: +```bash +$ kubectl -n koordinator-system get lease koord-scheduler --no-headers | awk '{print $2}' | cut -d'_' -f1 | xargs -I {} kubectl -n koordinator-system get pod {} -o wide --no-headers | awk '{print $6}' + 10.244.0.64 + +$ curl 10.244.0.64:10251/apis/v1/plugins/DeviceShare/nodeDeviceSummaries +$ curl 10.244.0.64:10251/apis/v1/plugins/DeviceShare/nodeDeviceSummaries/host04 +``` + +```json +{ + "allocateSet": { + "gpu": { + "xlf/gpu-pod7": { + "0": { + "kubernetes.io/gpu-core": "100", + "kubernetes.io/gpu-memory": "12508288Ki", + "kubernetes.io/gpu-memory-ratio": "100" + } + } + } + }, + "deviceFree": { + "kubernetes.io/gpu-core": "0", + "kubernetes.io/gpu-memory": "0", + "kubernetes.io/gpu-memory-ratio": "0" + }, + "deviceFreeDetail": { + "gpu": { + "0": { + "kubernetes.io/gpu-core": "0", + "kubernetes.io/gpu-memory": "0", + "kubernetes.io/gpu-memory-ratio": "0" + } + } + }, + "deviceTotal": { + "kubernetes.io/gpu-core": "100", + "kubernetes.io/gpu-memory": "12508288Ki", + "kubernetes.io/gpu-memory-ratio": "100" + }, + "deviceTotalDetail": { + "gpu": { + "0": { + "kubernetes.io/gpu-core": "100", + "kubernetes.io/gpu-memory": "12508288Ki", + "kubernetes.io/gpu-memory-ratio": "100" + } + } + }, + "deviceUsed": { + "kubernetes.io/gpu-core": "100", + "kubernetes.io/gpu-memory": "12508288Ki", + "kubernetes.io/gpu-memory-ratio": "100" + }, + "deviceUsedDetail": { + "gpu": { + "0": { + "kubernetes.io/gpu-core": "100", + "kubernetes.io/gpu-memory": "12508288Ki", + "kubernetes.io/gpu-memory-ratio": "100" + } + } + } +} +``` diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/gang-scheduling.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/gang-scheduling.md new file mode 100644 index 000000000..b439200ef --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/gang-scheduling.md @@ -0,0 +1,364 @@ +# GangScheduling + +## 简介 +Koord-dscheduler 提供了 Gang Scheduling 满足 All-or-Nothing 调度需求。用户可以声明最小资源集合数,只有当已经完成调度资源数超过前面声明当前最小资源集合数才能触发节点绑定。 +同时提供 `Strict` 和 `NonStrict` 两个参数用于控制资源累积过程,区别于其他社区方案将提供 two-level Gang 描述用于更好匹配真实场景。 + +## 设置 + +### 前置条件 + +- Kubernetes >= 1.18 +- Koordinator >= 0.70 + +### 安装 + +请确保 Kubernetes 集群已经安装 Koordinator 组件,如果没有安装,请参阅 [安装](/docs/installation)。 + +### 配置 + +GangScheduling 特性默认*开启*,无需修改 koord-scheduler 配置进行开启。 + +## GangScheduling 使用手册 + +### 快速开始 + +#### Gang CRD 方式 + +1.创建 pod-group 资源 +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: PodGroup +metadata: + name: gang-example + namespace: default +spec: + scheduleTimeoutSeconds: 100 + minMember: 2 +``` + +```bash +$ kubectl get pgs -n default + NAME AGE + gang-example 13s +``` + +2.创建子资源 pod1 +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example1 + namespace: default + labels: + pod-group.scheduling.sigs.k8s.io: gang-example +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +```bash +$ kubectl get pod -n default + NAME READY STATUS RESTARTS AGE + pod-example1 0/1 Pending 0 7s +``` + +3.创建子资源 pod2 +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example2 + namespace: default + labels: + pod-group.scheduling.sigs.k8s.io: gang-example +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +```bash +$ kubectl get pod -n default + NAME READY STATUS RESTARTS AGE + pod-example1 1/1 Running 0 53s + pod-example2 1/1 Running 0 5s +``` + +```bash +$ kubectl get pg gang-example -n default -o yaml +``` + +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: PodGroup +metadata: + creationTimestamp: "2022-10-09T09:08:17Z" + generation: 6 +spec: + minMember: 1 + scheduleTimeoutSeconds: 100 +status: + phase: Running + running: 2 + scheduled: 2 +``` + +#### Pod Annotaion 方式 +1.创建子资源 pod1 +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example1 + namespace: default + annotations: + gang.scheduling.koordinator.sh/name: "gang-example" + gang.scheduling.koordinator.sh/min-available: "2" +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +```bash +$ kubectl get pod -n default + NAME READY STATUS RESTARTS AGE + pod-example1 0/1 Pending 0 7s +``` + +2.创建子资源 pod2 +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example2 + namespace: default + annotations: + gang.scheduling.koordinator.sh/name: "gang-example" + gang.scheduling.koordinator.sh/min-available: "2" +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +```bash +$ kubectl get pod -n default + NAME READY STATUS RESTARTS AGE + pod-example1 1/1 Running 0 53s + pod-example2 1/1 Running 0 5s +``` + +```bash +$ kubectl get pg gang-example -n default -o yaml +``` + +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: PodGroup +metadata: + creationTimestamp: "2022-10-09T09:08:17Z" + generation: 6 +spec: + minMember: 1 + scheduleTimeoutSeconds: 100 +status: + phase: Running + running: 2 + scheduled: 2 +``` + +#### Gang 调度调试接口: +```bash +$ kubectl -n koordinator-system get lease koord-scheduler --no-headers | awk '{print $2}' | cut -d'_' -f1 | xargs -I {} kubectl -n koordinator-system get pod {} -o wide --no-headers | awk '{print $6}' + 10.244.0.64 + +$ curl 10.244.0.64:10251/apis/v1/plugins/Coscheduling/gang/default/gang-example +``` + +```json +{ + "boundChildren": { + "default/pod-example1": {}, + "default/pod-example2": {} + }, + "children": { + "default/pod-example1": {}, + "default/pod-example2": {} + }, + "childrenScheduleRoundMap": { + "default/pod-example1": 2, + "default/pod-example2": 2 + }, + "createTime": "2022-10-09T07:31:53Z", + "gangFrom": "GangFromPodAnnotation", + "gangGroup": null, + "hasGangInit": true, + "minRequiredNumber": 2, + "mode": "Strict", + "name": "default/gang-example", + "onceResourceSatisfied": true, + "scheduleCycle": 2, + "scheduleCycleValid": true, + "totalChildrenNum": 2, + "waitTime": 600000000000, + "waitingForBindChildren": {} +} +``` + +#### Gang 调度高级配置 +1.PodGroup Annotation 方式 + +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: PodGroup +metadata: + name: gang-example1 + namespace: default + annotations: + gang.scheduling.koordinator.sh/total-number: "3" + gang.scheduling.koordinator.sh/mode: "NonStrict" + gang.scheduling.koordinator.sh/groups: "[\"default/gang-example1\", \"default/gang-example2\"]" + +spec: + scheduleTimeoutSeconds: 100 + minMember: 2 + +``` + +- `gang.scheduling.koordinator.sh/total-number` 用于配置 gang 内子资源总数。如果未配置,则使用 `minMember` 配置。 +- `gang.scheduling.koordinator.sh/mode` 用于配置 Gang 调度失败处理策略。支持 `Strict\NonStrict` 两种模式,默认为 `Strict` 。 +- `gang.scheduling.koordinator.sh/groups` 用于配置支持多个 gang 为一组完成 Gang 调度,用于支持多个 gang 之间有依赖关系的场景。 + +2.Pod Annotation 方式 +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example2 + namespace: default + annotations: + gang.scheduling.koordinator.sh/name: "gang-example1" + gang.scheduling.koordinator.sh/min-available: "2" + gang.scheduling.koordinator.sh/total-number: "3" + gang.scheduling.koordinator.sh/mode: "Strict\NonStrict" + gang.scheduling.koordinator.sh/groups: "[\"default/gang-example1\", \"default/gang-example2\"]" + gang.scheduling.koordinator.sh/waiting-time: "100s" +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +- `gang.scheduling.koordinator.sh/total-number` 用于配置 gang 内子资源总数。如果未配置,则使用 `gang.scheduling.koordinator.sh/min-available` 配置。 +- `gang.scheduling.koordinator.sh/mode` 用于配置 Gang 调度失败处理策略。支持 `Strict\NonStrict` 两种模式,默认为 `Strict` 。 +- `gang.scheduling.koordinator.sh/groups` 用于配置支持多个 gang 为一组完成 Gang 调度,用于支持多个 gang 之间有依赖关系的场景。 +- `gang.scheduling.koordinator.sh/waiting-time` 用于配置自第一个 Pod 进入 Permit 阶段依赖的最大等待时间。 + +#### 调度器高级配置 +您可以在 helm 中修改 `koord-scheduler-config.yaml` 来调整 `Coscheduling` 配置,如下所示: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: koord-scheduler-config + namespace: {{ .Values.installation.namespace }} +data: + koord-scheduler-config: | + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: KubeSchedulerConfiguration + leaderElection: + leaderElect: true + resourceLock: leases + resourceName: koord-scheduler + resourceNamespace: {{ .Values.installation.namespace }} + profiles: + - pluginConfig: + - name: Coscheduling + args: + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: CoschedulingArgs` + defaultTimeout: 600s + controllerWorkers: 1 + - name: ElasticQuota + ... +``` + diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/load-aware-descheduling.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/load-aware-descheduling.md new file mode 100644 index 000000000..ea6247ce5 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/load-aware-descheduling.md @@ -0,0 +1,229 @@ +# 负载感知重调度 + + +调度器中支持的负载感知调度能够在调度时选择负载较低的节点运行新的Pod,但随着时间、集群环境变化以及工作负载面对的流量/请求的变化时,节点的利用率会动态的发生变化,集群内节点间原本负载均衡的情况被打破,甚至有可能出现极端负载不均衡的情况,影响到工作负载运行时质量。 + +koord-descheduler 感知集群内节点负载的变化,自动的优化超过负载水位安全阈值的节点,防止出现极端负载不均衡的情况。 + +## 简介 + +koord-descheduler 组件中 `LowNodeLoad` 插件负责感知负载水位完成热点打散重调度工作。`LowNodeLoad` 插件 与 Kubernetes 原生的 descheduler 的插件 LowNodeUtilization 不同的是,`LowNodeLoad` 是根据节点真实利用率的情况决策重调度,而 LowNodeUtilization 是根据资源分配率决策重调度。 + +LowNodeLoad插件有两个最重要的参数: +- `highThresholds` 表示负载水位的目标安全阈值,超过该阈值的节点上的 Pod 将参与重调度; +- `lowThresholds` 表示负载水位的空闲安全水位。低于该阈值的节点上的 Pod 不会被重调度。 + +以下图为例,`lowThresholds` 为45%,`highThresholds` 为 70%,我们可以把节点归为三类: + +1. 空闲节点(Idle Node)。资源利用率低于 45% 的节点; +2. 正常节点(Normal Node)。资源利用率高于 45% 但低于 70% 的节点,这个负载水位区间是我们期望的合理的区间范围 +3. 热点节点(Hotspot Node)。如果节点资源利用率高于70%,这个节点就会被判定为不安全了,属于热点节点,应该驱逐一部分 Pod,降低负载水位,使其不超过 70%。 + +![image](/img/low-node-load.png) + +在识别出哪些节点是热点后,koord-descheduler 将会执行迁移驱逐操作,驱逐热点节点中的部分 Pod 到空闲节点上。 + +如果一个集群中空闲节点的总数并不是很多时会终止重调度。这在大型集群中可能会有所帮助,在大型集群中,一些节点可能会经常或短时间使用不足。默认情况下,`numberOfNodes` 设置为零。可以通过设置参数 `numberOfNodes` 来开启该能力。 + +在迁移前,koord-descheduler 会计算出实际空闲容量,确保要迁移的 Pod 的实际利用率之和不超过集群内空闲总量。这些实际空闲容量来自于空闲节点,一个空闲节点实际空闲容量 = `(highThresholds - 节点当前负载) * 节点总容量`。假设节点 A 的负载水位是20%,highThresholdss是 70%,节点 A 的 CPU 总量为96C,那么 `(70%-20%) * 96 = 48C`,这 48C 就是可以承载的空闲容量了。 + +另外,在迁移热点节点时,会过滤筛选节点上的Pod,目前 koord-descheduler 支持多种筛选参数,可以避免迁移驱逐非常重要的 Pod: + +- 按 namespace 过滤。可以配置成只筛选某些 namespace 或者过滤掉某些 namespace +- 按 pod selector 过滤。可以通过 label selector 筛选出 Pod,或者排除掉具备某些 Label 的 Pod +- 配置 nodeFit 检查调度规则是否有备选节点。当开启后,koord-descheduler 根据备选 Pod 对应的 Node Affinity/Node Selector/Toleration ,检查集群内是否有与之匹配的 Node,如果没有的话,该 Pod 将不会去驱逐迁移。如果设置 `nodeFit` 为 false,此时完全由 koord-descheduler 底层的迁移控制器完成容量预留,确保有资源后开始迁移。 + +当筛选出 Pod 后,从 QoSClass、Priority、实际用量和创建时间等多个维度对这些 Pod 排序。 + +筛选 Pod 并完成排序后,开始执行迁移操作。迁移前会检查剩余空闲容量是否满足和当前节点的负载水位是否高于目标安全阈值,如果这两个条件中的一个不能满足,将停止重调度。每迁移一个 Pod 时,会预扣剩余空闲容量,同时也会调整当前节点的负载水位,直到剩余容量不足或者水位达到安全阈值。 + +## 设置 + +### 前置条件 + +- Kubernetes >= 1.18 +- Koordinator >= 1.1.1 + +### 安装 + +请确保 Koordinator 组件已正确安装在你的集群中。 如果没有,请参考[安装文档](/docs/installation)。 + +### 配置 + +负载感知重调度默认是禁用的。可以通过修改配置 ConfigMap `koord-descheduler-config` 启用该能力。 + +对于需要深入定制的用户,可以按照需要更改 Helm Chart 中的 ConfigMap `koord-descheduler-config` 设置参数。修改配置后需要重启 koord-descheduler 才能应用最新的配置。 + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: koord-descheduler-config + ... +data: + koord-descheduler-config: | + apiVersion: descheduler/v1alpha2 + kind: DeschedulerConfiguration + ... + # Execute the LowNodeLoad plugin every 60s + deschedulingInterval: 60s + profiles: + - name: koord-descheduler + plugins: + deschedule: + disabled: + - name: "*" + balance: + enabled: + - name: LowNodeLoad # Configure to enable the LowNodeLoad plugin + .... + pluginConfig: + - name: LowNodeLoad + args: + apiVersion: descheduler/v1alpha2 + kind: LowNodeLoadArgs + evictableNamespaces: + # include and exclude are mutually exclusive, only one of them can be configured. + # include indicates that only the namespace configured below will be processed + # include: + # - test-namespace + # exclude means to only process namespaces other than those configured below + exclude: + - "kube-system" + - "koordinator-system" + # lowThresholds defines the low usage threshold of resources + lowThresholds: + cpu: 20 + memory: 30 + # highThresholds defines the target usage threshold of resources + highThresholds: + cpu: 50 + memory: 60 + .... +``` + +| 字段 | 说明 | 版本 | +|-------|-------------|--------| +| paused | Paused 控制 LowNodeLoad 插件是否工作. | >= v1.1.1 | +| dryRun | DryRun 表示只执行重调度逻辑,但不重复啊迁移/驱逐 Pod | >= v1.1.1 | +| numberOfNodes | NumberOfNodes 可以配置为仅当未充分利用的节点数高于配置值时才激活该策略。 这在大型集群中可能会有所帮助,在大型集群中,一些节点可能会经常或短时间使用不足。 默认情况下,NumberOfNodes 设置为零。 | >= v1.1.1 | +| evictableNamespaces | 可以参与重调度的Namespace。可以配置 include和exclude两种,但两种策略只能二选一。include 表示只处理指定的 namespace;exclude 表示只处理指定之外的namespace。| >= v1.1.1 | +| nodeSelector | 通过 label selector 机制选择目标节点。 | >= v1.1.1 | +| podSelectors | 通过 label selector 选择要处理的Pod。 | >= v1.1.1 | +| nodeFit | 表示是否按照备选要迁移的Pod中指定的 Node Affinity/Node Selector/Resource Requests/TaintToleration 判断是否有空闲节点。没有则不参与调度。默认开启。可以设置为 false 禁用该能力。 | >= v1.1.1 | +| useDeviationThresholds | 如果 useDeviationThresholds 设置为 true,则阈值被视为与平均资源使用率的百分比偏差。lowThresholds 将从所有节点的平均值中减去,highThresholds 将添加到平均值中。高于此窗口的资源消耗被视为过度利用的,即热点节点。 | >= v1.1.1 | +| highThresholds | 表示负载水位的目标安全阈值,超过该阈值的节点上的Pod将参与重调度。 | >= v1.1.1 | +| lowThresholds | 表示负载水位的空闲安全水位。低于该阈值的节点上的Pod不会被重调度。 | >= v1.1.1 | + +## 使用负载感知重调度 + +本文示例的集群有3台 4核16GiB 节点。 + +1. 使用下面的 YAML 创建两个 stress Pod + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: stress-demo + namespace: default + labels: + app: stress-demo +spec: + replicas: 2 + selector: + matchLabels: + app: stress-demo + template: + metadata: + name: stress-demo + labels: + app: stress-demo + spec: + containers: + - args: + - '--vm' + - '2' + - '--vm-bytes' + - '1600M' + - '-c' + - '2' + - '--vm-hang' + - '2' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + limits: + cpu: '2' + memory: 4Gi + requests: + cpu: '2' + memory: 4Gi + restartPolicy: Always + schedulerName: koord-scheduler # use the koord-scheduler +``` + +```bash +$ kubectl create -f stress-demo.yaml +deployment.apps/stress-demo created +``` + +2. 观察 Pod 的状态,直到它们开始运行。 + +```bash +$ kubectl get pod -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +stress-demo-7fdd89cc6b-lml7k 1/1 Running 0 21m 10.0.2.83 cn-beijing.10.0.2.54 +stress-demo-7fdd89cc6b-xr5dl 1/1 Running 0 4m40s 10.0.2.77 cn-beijing.10.0.2.53 +``` + +这些 Pod 调度到了节点 `cn-beijing.10.0.2.53` 和 `cn-beijing.10.0.2.54`. + +3. 检查每个node节点的负载。 + +```bash +$ kubectl top node +NAME CPU(cores) CPU% MEMORY(bytes) MEMORY% +cn-beijing.10.0.2.53 3825m 98% 4051Mi 31% +cn-beijing.10.0.2.54 2155m 55% 4500Mi 35% +cn-beijing.10.0.2.58 182m 4% 1367Mi 10% +``` + +按照输出结果显示, 节点 `cn-beijing.10.0.2.53` 和 `cn-beijing.10.0.2.54` 负载比较高, 节点 `cn-beijing.10.0.2.58` 负载最低。 + +4. 更新配置 `koord-descheduler-config` 启用插件 `LowNodeLoad`。 + +5. 观察 Pod 变化,等待重调度器执行驱逐迁移操作。 + +```bash +$ kubectl get pod -w +NAME READY STATUS RESTARTS AGE +stress-demo-7fdd89cc6b-lml7k 1/1 Running 0 22m +stress-demo-7fdd89cc6b-xr5dl 1/1 Running 0 5m45s +stress-demo-7fdd89cc6b-xr5dl 1/1 Terminating 0 5m59s +stress-demo-7fdd89cc6b-8k8wq 0/1 Pending 0 0s +stress-demo-7fdd89cc6b-8k8wq 0/1 Pending 0 0s +stress-demo-7fdd89cc6b-8k8wq 0/1 ContainerCreating 0 0s +stress-demo-7fdd89cc6b-8k8wq 0/1 ContainerCreating 0 1s +stress-demo-7fdd89cc6b-8k8wq 1/1 Running 0 3s +``` + +6. 观察Event,可以看到如下迁移记录 + +```bash +$ kubectl get event |grep stress-demo-7fdd89cc6b-xr5dl +74s Normal Evicting podmigrationjob/e54863dc-b651-47e3-9ffd-08b6b4ff64d5 Pod "default/stress-demo-7fdd89cc6b-xr5dl" evicted from node "cn-beijing.10.0.2.53" by the reason "node is overutilized, cpu usage(56.13%)>threshold(50.00%)" +41s Normal EvictComplete podmigrationjob/e54863dc-b651-47e3-9ffd-08b6b4ff64d5 Pod "default/stress-demo-7fdd89cc6b-xr5dl" has been evicted +7m12s Normal Scheduled pod/stress-demo-7fdd89cc6b-xr5dl Successfully assigned default/stress-demo-7fdd89cc6b-xr5dl to cn-beijing.10.0.2.53 +7m12s Normal AllocIPSucceed pod/stress-demo-7fdd89cc6b-xr5dl Alloc IP 10.0.2.77/24 +7m12s Normal Pulling pod/stress-demo-7fdd89cc6b-xr5dl Pulling image "polinux/stress" +6m59s Normal Pulled pod/stress-demo-7fdd89cc6b-xr5dl Successfully pulled image "polinux/stress" in 12.685405843s +6m59s Normal Created pod/stress-demo-7fdd89cc6b-xr5dl Created container stress +6m59s Normal Started pod/stress-demo-7fdd89cc6b-xr5dl Started container stress +74s Normal Descheduled pod/stress-demo-7fdd89cc6b-xr5dl Pod evicted from node "cn-beijing.10.0.2.53" by the reason "node is overutilized, cpu usage(56.13%)>threshold(50.00%)" +73s Normal Killing pod/stress-demo-7fdd89cc6b-xr5dl Stopping container stress +7m13s Normal SuccessfulCreate replicaset/stress-demo-7fdd89cc6b Created pod: stress-demo-7fdd89cc6b-xr5dl +``` diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/load-aware-scheduling.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/load-aware-scheduling.md new file mode 100644 index 000000000..ff33e8560 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/load-aware-scheduling.md @@ -0,0 +1,311 @@ +# 负载感知调度 + +负载感知调度(Load Aware Scheduling) 是 koord-scheduler 提供的一种调度能力,调度 Pod 时根据节点的负载情况选择合适的节点,均衡节点间的负载情况。 + +## 简介 + +负载均衡是资源调度中的常见问题。资源未充分利用的节点会带来很大的资源浪费,而过度使用的节点可能会导致性能下降。这些问题都不能高效的管理和使用资源。 +原生 Kubernetes Scheduler 根据 Requests 和节点可分配总量来调度 Pod,既不考虑实时负载,也不估计使用量。 当我们期望使用原生调度器均匀的打散 Pod 并保持节点间的负载均衡,我们需要为应用程序设置精确的资源规格。此外,当 Koordinator 通过超卖机制提升资源使用效率时,我们需要一种机制尽量避免性能回退,并避免负载过高的问题。 + +koord-scheduler 参考 koordlet 上报的资源利用率数据平衡在线 Pod(LSE/LSR/LS)和离线 Pod(BE)的调度。 + +![图片](/img/load-aware-scheduling-arch.svg) + +想要了解更多信息,请参阅 [设计:负载感知调度](/docs/designs/load-aware-scheduling)。 + +## 设置 + +### 前提条件 + +- Kubernetes >= 1.18 +- Koordinator >= 0.4 + +### 安装 + +请确保 Koordinator 组件已正确安装在你的集群中。 如果没有,请参考[安装文档](/docs/installation)。 + +### 配置全局策略 + +负载感知调度是默认启用的,不需要修改调度器的配置即可使用。 + +对于需要深入定制的用户,可以通过修改 Helm Chart 中的 ConfigMap `koord-scheduler-config` 规则来配置负载感知调度。 + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: koord-scheduler-config + ... +data: + koord-scheduler-config: | + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: KubeSchedulerConfiguration + profiles: + - schedulerName: koord-scheduler + plugins: + # enable the LoadAwareScheduling plugin + filter: + enabled: + - name: LoadAwareScheduling + ... + score: + enabled: + - name: LoadAwareScheduling + weight: 1 + ... + reserve: + enabled: + - name: LoadAwareScheduling + ... + pluginConfig: + # configure the thresholds and weights for the plugin + - name: LoadAwareScheduling + args: + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: LoadAwareSchedulingArgs + # whether to filter nodes where koordlet fails to update NodeMetric + filterExpiredNodeMetrics: true + # the expiration threshold seconds when using NodeMetric + nodeMetricExpirationSeconds: 300 + # weights of resources + resourceWeights: + cpu: 1 + memory: 1 + # thresholds (%) of resource utilization + usageThresholds: + cpu: 75 + memory: 85 + # thresholds (%) of resource utilization of Prod Pods + prodUsageThresholds: + cpu: 55 + memory: 65 + # enable score according Prod usage + scoreAccordingProdUsage: true + # the factor (%) for estimating resource usage + estimatedScalingFactors: + cpu: 80 + memory: 70 + # enable resource utilization filtering and scoring based on percentile statistics + aggregated: + usageThresholds: + cpu: 65 + memory: 75 + usageAggregationType: "p99" + scoreAggregationType: "p99" +``` + +koord-descheduler 是通过 Configmap 加载[调度器配置](https://kubernetes.io/docs/reference/scheduling/config/)的。因此需要通过重启调度器才能使用最新的配置。 + +| 字段 | 说明 | 版本 | +|-------|-------------| --------| +| filterExpiredNodeMetrics | filterExpiredNodeMetrics 表示是否过滤koordlet更新NodeMetric失败的节点。 默认情况下启用,但在 Helm chart 中,它被禁用。| >= v0.4.0 | +| nodeMetricExpirationSeconds | nodeMetricExpirationSeconds 指示 NodeMetric 过期时间(以秒为单位)。 当 NodeMetrics 过期时,节点被认为是异常的。 默认为 180 秒。| >= v0.4.0 | +| resourceWeights | resourceWeights 表示资源的权重。 CPU 和 Memory 的权重默认都是 1。| >= v0.4.0 | +| usageThresholds | usageThresholds 表示整机的资源利用率阈值。 CPU 的默认值为 65%,内存的默认值为 95%。| >= v0.4.0 | +| estimatedScalingFactors | estimatedScalingFactors 表示估计资源使用时的因子。 CPU 默认值为 85%,Memory 默认值为 70%。| >= v0.4.0 | +| prodUsageThresholds| prodUsageThresholds 表示 Prod Pod 相对于整机的资源利用率阈值。 默认情况下不启用。 | >= v1.1.0 | +| scoreAccordingProdUsage | scoreAccordingProdUsage 控制是否根据 Prod Pod 的利用率进行评分。| >= v1.1.0 | +| aggregated | aggregated 支持基于百分位数统计的资源利用率过滤和评分。| >= v1.1.0 | + +Aggregated 支持的字段: + +| 字段 | 说明 | 版本 | +|-------|-------------| --------| +| usageThresholds | usageThresholds 表示机器基于百分位统计的资源利用率阈值。| >= v1.1.0| +| usageAggregationType | usageAggregationType 表示过滤时机器利用率的百分位类型。 目前支持 `avg`、`p50`、`p90`、`p95` 和 `p99`。 | >= v1.1.0 | +| usageAggregatedDuration | usageAggregatedDuration 表示过滤时机器利用率百分位数的统计周期。不设置该字段时,调度器默认使用 NodeMetrics 中最大周期的数据。| >= v1.1.0| +| scoreAggregationType | scoreAggregationType 表示评分时机器利用率的百分位类型。 目前支持 `avg`、`p50`、`p90`、`p95` 和 `p99`。| >= v1.1.0 +| scoreAggregatedDuration | scoreAggregatedDuration 表示打分时 Prod Pod 利用率百分位的统计周期。 不设置该字段时,调度器默认使用 NodeMetrics 中最大周期的数据。| >= v1.1.0 | + +### 按照节点配置过滤阈值 + +通过插件的配置可以作为集群默认的全局配置,用户也可以通过在节点上附加 annotation 来设置节点维度的负载阈值。 当节点上存在 annotation 时,会根据注解指定的参数进行过滤。 + +Annotation 定义如下: + +```go +const ( + AnnotationCustomUsageThresholds = "scheduling.koordinator.sh/usage-thresholds" +) + +// CustomUsageThresholds supports user-defined node resource utilization thresholds. +type CustomUsageThresholds struct { + // UsageThresholds indicates the resource utilization threshold of the whole machine. + UsageThresholds map[corev1.ResourceName]int64 `json:"usageThresholds,omitempty"` + // ProdUsageThresholds indicates the resource utilization threshold of Prod Pods compared to the whole machine + ProdUsageThresholds map[corev1.ResourceName]int64 `json:"prodUsageThresholds,omitempty"` + // AggregatedUsage supports resource utilization filtering and scoring based on percentile statistics + AggregatedUsage *CustomAggregatedUsage `json:"aggregatedUsage,omitempty"` +} + +type CustomAggregatedUsage struct { + // UsageThresholds indicates the resource utilization threshold of the machine based on percentile statistics + UsageThresholds map[corev1.ResourceName]int64 `json:"usageThresholds,omitempty"` + // UsageAggregationType indicates the percentile type of the machine's utilization when filtering + UsageAggregationType slov1alpha1.AggregationType `json:"usageAggregationType,omitempty"` + // UsageAggregatedDuration indicates the statistical period of the percentile of the machine's utilization when filtering + UsageAggregatedDuration *metav1.Duration `json:"usageAggregatedDuration,omitempty"` +} +``` + +## 使用负载感知调度 + +### 感知整机负载进行调度 + +本文示例的集群有3台 4核16GiB 节点。 + +1. 使用下面的 YAML 创建一个 `stress` Pod + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: stress-demo + namespace: default + labels: + app: stress-demo +spec: + replicas: 1 + selector: + matchLabels: + app: stress-demo + template: + metadata: + name: stress-demo + labels: + app: stress-demo + spec: + containers: + - args: + - '--vm' + - '2' + - '--vm-bytes' + - '1600M' + - '-c' + - '2' + - '--vm-hang' + - '2' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + limits: + cpu: '2' + memory: 4Gi + requests: + cpu: '2' + memory: 4Gi + restartPolicy: Always + schedulerName: koord-scheduler # use the koord-scheduler +``` + +```bash +$ kubectl create -f stress-demo.yaml +deployment.apps/stress-demo created +``` + +2. 观察 Pod 的状态,直到它开始运行。 + +```bash +$ kubectl get pod -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +stress-demo-7fdd89cc6b-gcnzn 1/1 Running 0 82s 10.0.3.114 cn-beijing.10.0.3.112 +``` + +Pod `stress-demo-7fdd89cc6b-gcnzn` 调度在 `cn-beijing.10.0.3.112`。 + +3. 检查每个node节点的负载。 + +```bash +$ kubectl top node +NAME CPU(cores) CPU% MEMORY(bytes) MEMORY% +cn-beijing.10.0.3.110 92m 2% 1158Mi 9% +cn-beijing.10.0.3.111 77m 1% 1162Mi 9% +cn-beijing.10.0.3.112 2105m 53% 3594Mi 28% +``` +按照输出结果显示,节点 `cn-beijing.10.0.3.111` 负载最低,节点`cn-beijing.10.0.3.112` 的负载最高。 + +4. 使用下面的 YAML 文件部署 `nginx` deployment。 + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-with-loadaware + labels: + app: nginx +spec: + replicas: 6 + selector: + matchLabels: + app: nginx + template: + metadata: + name: nginx + labels: + app: nginx + spec: + schedulerName: koord-scheduler # use the koord-scheduler + containers: + - name: nginx + image: nginx + resources: + limits: + cpu: 500m + requests: + cpu: 500m +``` + +```bash +$ kubectl create -f nginx-with-loadaware.yaml +deployment/nginx-with-loadawre created +``` + +5. 检查 `nginx` Pods 的调度结果。 + +```bash +$ kubectl get pods | grep nginx +nginx-with-loadaware-5646666d56-224jp 1/1 Running 0 18s 10.0.3.118 cn-beijing.10.0.3.110 +nginx-with-loadaware-5646666d56-7glt9 1/1 Running 0 18s 10.0.3.115 cn-beijing.10.0.3.110 +nginx-with-loadaware-5646666d56-kcdvr 1/1 Running 0 18s 10.0.3.119 cn-beijing.10.0.3.110 +nginx-with-loadaware-5646666d56-qzw4j 1/1 Running 0 18s 10.0.3.113 cn-beijing.10.0.3.111 +nginx-with-loadaware-5646666d56-sbgv9 1/1 Running 0 18s 10.0.3.120 cn-beijing.10.0.3.111 +nginx-with-loadaware-5646666d56-z79dn 1/1 Running 0 18s 10.0.3.116 cn-beijing.10.0.3.111 +``` + +现在我们可以看到 `nginx` pods 被调度在 `cn-beijing.10.0.3.112` (负载最高的节点) 以外的节点上。 + +### 感知 Prod Pods 的负载进行调度 + +如果一个 Node 中调度了很多 BestEffort Pod,可能会因为节点的负载已达到使用限制而导致延迟敏感的 Pod 无法调度。 在 Koordinator v1.1.0 中,负载感知调度针对这种场景进行了优化。 对于延迟敏感(LSE/LSR/LS)的 Pod,优先调度到 Prod Pod 总利用率较低的节点,而 BestEffort(BE) Pod 根据整机利用率水平进行调度。 + +通过设置以下参数启用相关优化: + +| 字段 | 说明 | 版本 | +|-------|-------------| --------| +| prodUsageThresholds| prodUsageThresholds 表示 Prod Pod 相对于整机的资源利用率阈值。 默认情况下不启用。 | >= v1.1.0 | +| scoreAccordingProdUsage | scoreAccordingProdUsage 控制是否根据 Prod Pod 的利用率进行评分。| >= v1.1.0 | + +### 感知基于百分位数统计的利用率进行调度 + +Koordinator v1.0及以前的版本都是按照 koordlet 上报的平均利用率数据进行过滤和打分。但平均值隐藏了比较多的信息,因此在 Koordinator v1.1 中 koordlet 新增了根据百分位数统计的利用率聚合数据。调度器侧也跟着做了相应的适配。 + +通过设置以下参数启用相关优化: + +| 字段 | 说明 | 版本 | +|-------|-------------| --------| +| aggregated | aggregated 支持基于百分位数统计的资源利用率过滤和评分。| >= v1.1.0 | + +Aggregated 支持的字段: + +| 字段 | 说明 | 版本 | +|-------|-------------| --------| +| usageThresholds | usageThresholds 表示机器基于百分位统计的资源利用率阈值。| >= v1.1.0| +| usageAggregationType | usageAggregationType 表示过滤时机器利用率的百分位类型。 目前支持 `avg`、`p50`、`p90`、`p95` 和 `p99`。 | >= v1.1.0 | +| usageAggregatedDuration | usageAggregatedDuration 表示过滤时机器利用率百分位数的统计周期。不设置该字段时,调度器默认使用 NodeMetrics 中最大周期的数据。| >= v1.1.0| +| scoreAggregationType | scoreAggregationType 表示评分时机器利用率的百分位类型。 目前支持 `avg`、`p50`、`p90`、`p95` 和 `p99`。| >= v1.1.0 +| scoreAggregatedDuration | scoreAggregatedDuration 表示打分时 Prod Pod 利用率百分位的统计周期。 不设置该字段时,调度器默认使用 NodeMetrics 中最大周期的数据。| >= v1.1.0 | + +`aggregated` 和 `usageThresholds` 参数是互斥的。 当两者都配置时,将使用 `aggregated`。此外,目前不支持 Pod 类型感知。 \ No newline at end of file diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/memory-evict.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/memory-evict.md new file mode 100644 index 000000000..062625c9a --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/memory-evict.md @@ -0,0 +1,122 @@ +# 基于内存用量的驱逐策略 + +## 简介 + +Koordinator支持了将节点空闲资源动态超卖给低优先级Pod,在混部场景下,节点实际的内存资源用量时刻在变化,对于内存这类不可压缩类型的资源, +当节点资源用量较高时,可能会引发整机内存OOM,导致高优先级Pod的进程被kill。为防止这一情况发生,Koordiantor提供了基于单机内存用量的驱逐策略。 +单机组件Koordlet会以秒级粒度持续探测整机内存的用量情况(Total-Available),当整机资源内存用量较高时,会将低优先级的BE类型Pod驱逐, +保障高优先级Pod的服务质量。在驱逐过程中会首先选择优先级(Pod.Spec.Priority)更低的Pod进行驱逐,若优先级相同, +则优先驱逐内存资源用量更多的Pod,直至整机内存用量降低到配置的安全水位(evictThreshold)以下。 + +![image](/img/memory-evict.svg) + +## 使用限制 +请确保Koordinator已正确安装在你的集群中。若未安装,请参考[安装文档](https://koordinator.sh/docs/installation),所需的版本要求情况如下: + +| 组件 | 版本要求 | +| --- | ------- | +| Kubernetes | ≥v1.18 | +| koordinator | ≥v0.3.0 | + +该功能由单机组件Koordlet提供,对应的feature-gate默认关闭,使用前请确保koordlet的启动参数`-feature-gates`中已经添加了`BEMemoryEvict=true`, +详见[参考示例](https://github.com/koordinator-sh/charts/blob/main/versions/v1.2.0/templates/koordlet.yaml#L36)。 + +## 操作步骤 + +1. 使用以下ConfigMap,创建configmap.yaml文件 + ```yaml + #ConfigMap slo-controller-config 样例。 + apiVersion: v1 + kind: ConfigMap + metadata: + name: slo-controller-config # 以koord-manager实际配置的名字为准,例如ack-slo-config + namespace: koordinator-system # 命名空间以环境中实际安装的情况为准,例如kube-system + data: + # 开启基于内存用量的驱逐功能。 + resource-threshold-config: | + { + "clusterStrategy": { + "enable": true, + "memoryEvictThresholdPercent": 70 + } + } + ``` + + | 参数 | 类型 | 取值范围 | 说明 | + | :-------------- | :------ | :-------- | :----------------------------------------------------------- | + | `enable` | Boolean | true; false | true:集群全局开启单机内存驱逐策略。false(默认值):集群全局关闭单机内存驱逐策略。 | + | `memoryEvictThresholdPercent` | Int | 0~100 | 整机内存资源用量百分比水位,表示触发驱逐的内存阈值,默认值为70。 | + +2. 查看安装的命名空间下是否存在ConfigMap,以命名空间`koordinator-system`和ConfigMap名字`slo-controller-config`为例,具体以实际安装配置为准。 + + - 若存在ConfigMap `slo-controller-config`,请使用PATCH方式进行更新,避免干扰ConfigMap中其他配置项。 + + ```bash + kubectl patch cm -n koordinator-system slo-controller-config --patch "$(cat configmap.yaml)" + ``` + + - 若不存在ConfigMap `slo-controller-config`,请执行以下命令进行创建Configmap。 + + ```bash + kubectl apply -f configmap.yaml + ``` + +3. 使用以下YAML内容,创建be-pod-demo.yaml文件。 + + ```yaml + apiVersion: v1 + kind: Pod + metadata: + name: be-pod-demo + labels: + koordinator.sh/qosClass: 'BE' #指定Pod的QoS级别为BE。 + spec: + containers: + - args: + - '-c' + - '1' + - '--vm' + - '1' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + restartPolicy: Always + schedulerName: default-scheduler + # 当ColocationProfile功能开启时(默认启用),priorityClassName是必填的 + priorityClassName: koord-batch + ``` + +4. 执行以下命令,将be-pod-demo部署到集群。 + + ```bash + $ kubectl apply -f be-pod-demo.yaml + ``` + +5. 执行以下命令,查看be-pod-demo状态,等待Pod启动完成。 + + ```bash + $ kubectl get pod be-pod-demo + NAME READY STATUS RESTARTS AGE + be-pod-demo 1/1 Running 0 7s + ``` + +6. 在节点执行以下命令,使用[stress工具](https://linux.die.net/man/1/stress)启动进程, +确保整机内存资源用量被提升到驱逐水位以上,其中`--vm-bytes`参数表示stress进程占用的内存量10GB,测试时可根据实际机型情况进行调整。 + + ```bash + $ stress --cpu 1 --vm 1 --vm-bytes 10G --vm-keep + ``` + +7. 观察be-pod-demo运行情况,可以发现be-pod-demo已经不存在,驱逐信息可以通过event查看到。 + + ```bash + $ kubectl get pod be-pod-demo + Error from server (NotFound): pods "be-pod-demo" not found + + $ kubectl get event + LAST SEEN TYPE REASON OBJECT MESSAGE + 46s Normal Killing pod/be-pod-demo Stopping container stress + 48s Warning evictPodSuccess $you-pod-object evict Pod:be-pod-demo, reason: EvictPodByNodeMemoryUsage, message: killAndEvictBEPods for node(${your-node-id}), need to release memory: 8077889699 + ``` \ No newline at end of file diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/memory-qos.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/memory-qos.md new file mode 100644 index 000000000..66f5e60f9 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/memory-qos.md @@ -0,0 +1,355 @@ +# Memory QoS + +## Introduction + +The Koordlet provides the *Memory Quality of Service* (QoS) feature for containers. You can use this feature to +optimize the performance of memory-sensitive applications while ensuring fair memory scheduling among containers. This +topic describes how to enable the memory QoS feature for containers. + +### Background + +The following memory limits apply to containers: + +- The memory limit of the container. If the amount of memory that a container uses, including the page cache, is about + to reach the memory limit of the container, the memory reclaim mechanism of the OS kernel is triggered. As a result, + the application in the container may not be able to request or release memory resources as normal. +- The memory limit of the node. If the memory limit of a container is greater than the memory request of the container, + the container can overcommit memory resources. In this case, the available memory on the node may become insufficient. + This causes the OS kernel to reclaim memory from containers. As a result, the performance of your application is + downgraded. In extreme cases, the node cannot run as normal. + +To improve the performance of applications and the stability of nodes, Koordinator provides the memory QoS feature for +containers. We recommend that you use Anolis OS as the node OS. For other OS, we will try our best to adapt, and users +can still enable it without side effects. After you enable the memory QoS feature for a container, Koordlet +automatically configures the memory control group (memcg) based on the configuration of the container. This helps you +optimize the performance of memory-sensitive applications while ensuring fair memory scheduling on the node. + +Memory QoS provides the following optimizations to improve the memory utilization of pods: + +- When the memory used by a pod is about to reach the memory limit of the pod, the memcg performs asynchronous reclaim for a specific amount of memory. This prevents the reclaim of all the memory that the pod uses and therefore minimizes the adverse impact on the application performance caused by direct memory reclaim. +- Memory reclaim is performed in a fairer manner among pods. When the available memory on a node becomes insufficient, memory reclaim is first performed on pods that use more memory than their memory requests. This ensures sufficient memory on the node when a pod applies for a large amount of memory. +- If the BestEffort pods on a node use more memory than their memory requests, the system prioritizes the memory requirements of Guaranteed pods and Burstable pods over the memory requirements of BestEffort pods. + +![image](/img/memory-qos.png) + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 +- Koordinator >= 0.3 + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to +[Installation](/docs/installation). + +### Configurations + +Koordlet has already enabled Memory QoS feature (`-feature-gates=AllAlpha=true`). +If not, please enable it manually by updating the feature gate in the koordlet daemonset. + +> NOTE: Memory QoS is controlled by the `CgroupReconcile` feature-gate. + +```yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: koordlet +spec: + selector: + matchLabels: + koord-app: koordlet + template: + metadata: + labels: + koord-app: koordlet + spec: + containers: + - command: + - /koordlet + args: + - -CgroupRootDir=/host-cgroup/ + - -feature-gates=XXXX,CgroupReconcile=true # enable CPU Burst feature + ... +``` + +## Use Memory QoS + +When you enable memory QoS for the containers in a pod, the memcg is automatically configured based on the specified +ratios and pod parameters. To enable memory QoS for the containers in a pod, perform the following steps. + +### Use an annotation to enable Memory QoS for the pod + +Add the following annotations to enable memory QoS for the containers in a pod: + +```yaml +annotations: + # To enable memory QoS for the containers in a pod, set the value to auto. + koordinator.sh/memoryQOS: '{"policy": "auto"}' + # To disable memory QoS for the containers in a pod, set the value to none. + #koordinator.sh/memoryQOS: '{"policy": "none"}' +``` + +### Use a ConfigMap to enable memory QoS for all the containers in a cluster + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + resource-qos-config: |- + { + "clusterStrategy": { + "lsClass": { + "memoryQOS": { + "enable": true + } + }, + "beClass": { + "memoryQOS": { + "enable": true + } + } + } + } +``` + +### (Optional) Advanced Settings + +The following table describes the advanced parameters that you can use to configure fine-grained memory QoS +configurations at the pod level and cluster level. + +| Parameter | Data type | Valid value | Description | +| ------------------- | ----------- | --------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| enable | Boolean |
  • true
  • false
|
  • true: enables memory QoS for all the containers in a cluster. The default memory QoS settings for the QoS class of the containers are used.
  • false: disables memory QoS for all the containers in a cluster. The memory QoS settings are restored to the original settings for the QoS class of the containers.
| +| policy | String |
  • auto
  • default
  • none
|
  • auto: enables memory QoS for the containers in the pod and uses the recommended memory QoS settings. The recommended memory QoS settings are prioritized over the cluster-wide memory QoS settings.
  • default: specifies that the pod inherits the cluster-wide memory QoS settings.
  • none: disables memory QoS for the pod. The relevant memory QoS settings are restored to the original settings. The original settings are prioritized over the cluster-wide memory QoS settings.
| +| minLimitPercent | Int | 0~100 | Unit: %. Default value:`0`. The default value indicates that this parameter is disabled. This parameter specifies the unreclaimable proportion of the memory request of a pod. The amount of unreclaimable memory is calculated based on the following formula: `Value of memory.min = Memory request × Value of minLimitPercent/100`. This parameter is suitable for scenarios where applications are sensitive to the page cache. You can use this parameter to cache files to optimize read and write performance. For example, if you specify Memory `Request=100MiB` and `minLimitPercent=100` for a container, `the value of memory.min is 104857600`. | +| lowLimitPercent | Int | 0~100 | Unit: %. Default value:`0`. The default value indicates that this parameter is disabled. This parameter specifies the relatively unreclaimable proportion of the memory request of a pod. The amount of relatively unreclaimable memory is calculated based on the following formula: `Value of memory.low = Memory request × Value of lowLimitPercent/100`. For example, if you specify `Memory Request=100MiB` and `lowLimitPercent=100` for a container, `the value of memory.low is 104857600`. | +| throttlingPercent | Int | 0~100 | Unit: %. Default value:`0`. The default value indicates that this parameter is disabled. This parameter specifies the memory throttling threshold for the ratio of the memory usage of a container to the memory limit of the container. The memory throttling threshold for memory usage is calculated based on the following formula: `Value of memory.high = Memory limit × Value of throttlingPercent/100`. If the memory usage of a container exceeds the memory throttling threshold, the memory used by the container will be reclaimed. This parameter is suitable for container memory overcommitment scenarios. You can use this parameter to cgroups from triggering OOM. For example, if you specify `Memory Limit=100MiB` and `throttlingPercent=80` for a container, `the value of memory.high is 83886080`, which is equal to 80 MiB. | +| wmarkRatio | Int | 0~100 | Unit: %. Default value:`95`. A value of `0` indicates that this parameter is disabled. This parameter specifies the threshold of the usage of the memory limit or the value of `memory.high` that triggers asynchronous memory reclaim. If `throttlingPercent` is disabled, the asynchronous memory reclaim threshold for memory usage is calculated based on the following formula: `Value of memory.wmark_high = Memory limit × wmarkRatio/100`. If `throttlingPercent` is enabled, the asynchronous memory reclaim threshold for memory usage is calculated based on the following formula: `Value of memory.wmark_high = Value of memory.high × wmarkRatio/100`. If the usage of the memory limit or the value of memory.high exceeds the threshold, the memcg backend asynchronous reclaim feature is triggered. For example, if you specify `Memory Limit=100MiB`for a container, the memory throttling setting is`memory.high=83886080`, the reclaim ratio setting is `memory.wmark_ratio=95`, and the reclaim threshold setting is `memory.wmark_high=79691776`. | +| wmarkMinAdj | Int | -25~50 | Unit: %. The default value is `-25` for the `LS`/ `LSR` QoS class and `50` for the `BE` QoS class. A value of 0 indicates that this parameter is disabled. This parameter specifies the adjustment to the global minimum watermark for a container. A negative value decreases the global minimum watermark and therefore postpones memory reclaim for the container. A positive value increases the global minimum watermark and therefore antedates memory reclaim for the container. For example, if you create a pod whose QoS class is LS, the default setting of this parameter is `memory.wmark_min_adj=-25`, which indicates that the minimum watermark is decreased by 25% for the containers in the pod. | + +### Example + +0. The testing environment is shown below: + +- Kubernetes: 1.20 +- Nodes: + - Stress Node: an ECS instance (8 vCPU, 32GB RAM) for performing stress tests. + - Tested Node: an ECS instance (8 vCPU, 32GB RAM) runs the workload and serves. + +1. Create a file named redis-demo.yaml with the following YAML template: + +```yaml +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: redis-demo-config +data: + redis-config: | + appendonly yes + appendfsync no +--- +apiVersion: v1 +kind: Pod +metadata: + name: redis-demo + labels: + name: redis-demo + annotations: + koordinator.sh/memoryQOS: '{"policy": "auto"}' # Add this annotation to enable memory QoS + koordinator.sh/qosClass: 'LS' # Set the QoS class of the Redis pod to LS +spec: + containers: + - name: redis + image: redis:5.0.4 + command: + - redis-server + - "/redis-master/redis.conf" + env: + - name: MASTER + value: "true" + ports: + - containerPort: 6379 + resources: + limits: + cpu: "2" + memory: "6Gi" + requests: + cpu: "2" + memory: "2Gi" + volumeMounts: + - mountPath: /redis-master-data + name: data + - mountPath: /redis-master + name: config + volumes: + - name: data + emptyDir: {} + - name: config + configMap: + name: redis-demo-config + items: + - key: redis-config + path: redis.conf + nodeName: # Set nodeName to the name of the tested node +--- +apiVersion: v1 +kind: Service +metadata: + name: redis-demo +spec: + ports: + - name: redis-port + port: 6379 + protocol: TCP + targetPort: 6379 + selector: + name: redis-demo + type: ClusterIP +``` + +2. Run the following command to deploy Redis Server as the test application. + +You can access the redis-demo Service from within the cluster. + +```bash +kubectl apply -f redis-demo.yaml +``` + +3. Simulate the scenario of memory overcommitment. + +Use the Stress tool to increase the load on memory and trigger memory reclaim. The sum of the memory limits of all pods +on the node exceeds the physical memory of the node. + + a. Create a file named stress-demo.yaml with the following YAML template: + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: stress-demo + labels: + name: stress-demo + annotations: + koordinator.sh/memoryQOS: '{"policy": "auto"}' # Add this annotation to enable memory QoS + koordinator.sh/qosClass: 'BE' # Set the QoS class of the Stress pod to BE +spec: + containers: + - args: + - '--vm' + - '2' + - '--vm-bytes' + - 11G + - '-c' + - '2' + - '--vm-hang' + - '2' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + restartPolicy: Always + nodeName: # Set nodeName to the name of the tested node, which is the node on which the Redis pod is deployed +``` + + b. Run the following command to deploy stress-demo: + +```bash +kubectl apply -f stress-demo.yaml +``` + +4. Run the following command to query the global minimum watermark of the node: + +> Note In memory overcommitment scenarios, if the global minimum watermark of the node is set to a low value, OOM +> killers may be triggered for all pods on the node even before memory reclaim is performed. Therefore, we recommend +> that you set the global minimum watermark to a high value. In this example, the global minimum watermark is set +> to 4,000,000 KB for the tested node that has 32 GiB of memory. + +```bash +cat /proc/sys/vm/min_free_kbytes +``` + +Expected output: + +```bash +4000000 +``` + +5. Use the following YAML template to deploy the memtier-benchmark tool to send requests to the tested node: + +```yaml +apiVersion: v1 +kind: Pod +metadata: + labels: + name: memtier-demo + name: memtier-demo +spec: + containers: + - command: + - memtier_benchmark + - '-s' + - 'redis-demo' + - '--data-size' + - '200000' + - "--ratio" + - "1:4" + image: 'redislabs/memtier_benchmark:1.3.0' + name: memtier + restartPolicy: Never + nodeName: # Set nodeName to the name of the stress node that is used to send requests. +``` + +6. Run the following command to query the test results from memtier-benchmark: + +```bash +kubectl logs -f memtier-demo +``` + +7. Use the following YAML template to disable memory QoS for the Redis pod and Stress pod. Then, perform stress tests +again and compare the results. + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: redis-demo + labels: + name: redis-demo + annotations: + koordinator.sh/memoryQOS: '{"policy": "none"}' # Disable memory QoS. + koordinator.sh/qosClass: 'LS' +spec: + ... + +--- +apiVersion: v1 +kind: Pod +metadata: + name: stress-demo + labels: + name: stress-demo + annotations: + koordinator.sh/memoryQOS: '{"policy": "none"}' # Disable memory QoS. + koordinator.sh/qosClass: 'BE' +``` + +8. Check the results of Memory QoS enabled and disabled. + +- Disabled: Set the memory QoS policy of the pod to `none`. +- Enabled: Set the memory QoS policy of the pod to `auto` (the recommended parameters of memory QoS are used). + +| Metric | Disabled | Enabled | +| ----------------- | ------------- | ------------- | +| Latency-avg | 51.32 ms | 47.25 ms | +| Throughput-avg | 149.0 MB/s | 161.9 MB/s | + +The table shows that the latency of the Redis pod is reduced by 7.9% and the throughput of the Redis pod is increased +by 8.7% after memory QoS is enabled. This indicates that the memory QoS feature can optimize the performance of +applications in memory overcommitment scenarios. diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/multi-hierarchy-elastic-quota-management.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/multi-hierarchy-elastic-quota-management.md new file mode 100644 index 000000000..e5de06fcb --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/multi-hierarchy-elastic-quota-management.md @@ -0,0 +1,621 @@ +# Multi Hierarchy Elastic Quota Management + +Multi Hierarchy ElasticQuota Management is an ability of koord-scheduler to manage different user's resource usage in a shared-cluster. + +## Introduction +When several users or teams share a cluster, fairness of resource allocation is very important. the Koordinator provides +multi-hierarchy elastic quota management mechanism for the scheduler. +- It supports configuring quota groups in a tree structure, which is similar to the organizational structure of most companies. +- It supports the borrowing / returning of resources between different quota groups, for better resource utilization efficiency. +The busy quota groups can automatically temporarily borrow the resources from the idle quota groups, which can improve the +utilization of the cluster. At the same time, when the idle quota group turn into the busy quota group, it can also automatically +take back the "lent-to" resources. +- It considers the resource fairness between different quota groups. When the busy quota groups borrow the +resources from the idle quota groups, the resources can be allocated to the busy quota groups under some fair rules. + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 +- Koordinator >= 0.71 + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to [Installation](/docs/installation). + +### Configurations + +Multi-Hierarchy-ElasticQuota-Management is *Enabled* by default. You can use it without any modification on the koord-descheduler config. + +## Use Multi-Hierarchy-ElasticQuota-Management + +### Quick Start by Label + +1.Create a Deployment `quota-example` with the YAML file below. + +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-example + namespace: default + labels: + quota.scheduling.koordinator.sh/parent: "" + quota.scheduling.koordinator.sh/is-parent: "false" +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 10 + memory: 20Mi +``` + +```bash +$ kubectl apply -f quota-example.yaml + elasticquota.scheduling.sigs.k8s.io/quota-example created + +$ kubectl get eqs -n default + NAME AGE + test-d 2s +``` + +2.Create a pod `pod-example` with the YAML file below. +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example + namespace: default + labels: + quota.scheduling.koordinator.sh/name: "quota-example" +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +```bash +$ kubectl apply -f pod-example.yaml + pod/pod-example created +``` + +3.Verify `quota-example` has changed. +```bash +$ kubectl get eqs -n default quota-example -o yaml +``` +```yaml +kind: ElasticQuota +metadata: + annotations: + quota.scheduling.koordinator.sh/request: '{"cpu":"40m","memory":"40Mi"}' + quota.scheduling.koordinator.sh/runtime: '{"cpu":"40m","memory":"40Mi"}' + quota.scheduling.koordinator.sh/shared-weight: '{"cpu":"40","memory":"40Gi"}' + creationTimestamp: "2022-10-08T09:26:38Z" + generation: 2 + labels: + quota.scheduling.koordinator.sh/is-parent: "false" + quota.scheduling.koordinator.sh/parent: root + manager: koord-scheduler + operation: Update + time: "2022-10-08T09:26:50Z" + name: quota-example + namespace: default + resourceVersion: "39012008" +spec: + max: + cpu: "40" + memory: 40Gi + min: + cpu: "10" + memory: 20Mi +status: + used: + cpu: 40m + memory: 40Mi +``` + +### Quick Start by Namespace +1.Create namespace +```bash +$ kubectl create ns quota-example + namespace/quota-example created +``` + +2.Create a Deployment `quota-example` with the YAML file below. + +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-example + namespace: quota-example + labels: + quota.scheduling.koordinator.sh/parent: "" + quota.scheduling.koordinator.sh/is-parent: "false" +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 10 + memory: 20Mi +``` + +```bash +$ kubectl apply -f quota-example.yaml + elasticquota.scheduling.sigs.k8s.io/quota-example created + +$ kubectl get eqs -n quota-example + NAME AGE + test-d 2s +``` + +2.Create a pod `pod-example` with the YAML file below. +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example + namespace: quota-example +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +```bash +$ kubectl apply -f pod-example.yaml + pod/pod-example created +``` + +3.Verify `quota-example` has changed. +```bash +$ kubectl get eqs -n quota-example quota-example -o yaml +``` +```yaml +kind: ElasticQuota +metadata: + annotations: + quota.scheduling.koordinator.sh/request: '{"cpu":"40m","memory":"40Mi"}' + quota.scheduling.koordinator.sh/runtime: '{"cpu":"40m","memory":"40Mi"}' + quota.scheduling.koordinator.sh/shared-weight: '{"cpu":"40","memory":"40Gi"}' + creationTimestamp: "2022-10-08T09:26:38Z" + generation: 2 + labels: + quota.scheduling.koordinator.sh/is-parent: "false" + quota.scheduling.koordinator.sh/parent: root + manager: koord-scheduler + operation: Update + time: "2022-10-08T09:26:50Z" + name: quota-example + namespace: quota-example + resourceVersion: "39012008" +spec: + max: + cpu: "40" + memory: 40Gi + min: + cpu: "10" + memory: 20Mi +status: + used: + cpu: 40m + memory: 40Mi +``` + +### Quota Debug Api. +```bash +$ kubectl -n koordinator-system get lease koord-scheduler --no-headers | awk '{print $2}' | cut -d'_' -f1 | xargs -I {} kubectl -n koordinator-system get pod {} -o wide --no-headers | awk '{print $6}' + 10.244.0.64 + +$ curl 10.244.0.64:10251/apis/v1/plugins/ElasticQuota/quota/quota-example +``` + +```json +{ + "allowLentResource": true, + "autoScaleMin": { + "cpu": "10", + "memory": "20Mi", + }, + "isParent": false, + "max": { + "cpu": "40", + "memory": "40Gi", + }, + "min": { + "cpu": "10", + "memory": "20Mi", + }, + "name": "quota-example", + "parentName": "root", + "podCache": { + "pod-example": { + "isAssigned": true, + "resource": { + "cpu": "40m", + "memory": "40Mi" + } + } + }, + "request": { + "cpu": "40m", + "memory": "40Mi" + }, + "runtime": { + "cpu": "40m", + "memory": "41943040", + }, + "runtimeVersion": 39, + "sharedWeight": { + "cpu": "40", + "memory": "40Gi", + }, + "used": { + "cpu": "40m", + "memory": "40Mi" + } +} +``` +The main different with yaml is that we can find all quota's pods and its status in `podCache`. + +### Advanced Configurations +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: false + quota.scheduling.koordinator.sh/parent: "parent" + quota.scheduling.koordinator.sh/allow-lent-resource: true + quota.scheduling.koordinator.sh/shared-weight: '{"cpu":"40","memory":"40Gi"}' +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 10 + memory: 20Mi +``` + +- `quota.scheduling.koordinator.sh/is-parent` is disposed by the user. It reflects the "child\parent" attribute of the quota group. Default is child. +- `quota.scheduling.koordinator.sh/parent` is disposed by the user. It reflects the parent quota name. Default is root. +- `quota.scheduling.koordinator.sh/shared-weight` is disposed by the user. It reflects the ability to share the "lent to" resource. Default equals to "max". +- `quota.scheduling.koordinator.sh/allow-lent-resource` is disposed by the user. It reflects whether quota group allows lent unused "min" to others. + +### WebHook Verify +1.Except for the first level quota group, we require that the sum of "min" of all sub quota groups should be less than or +equal to the "min" of parent group. + +first create parent quota: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-parent-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: true +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 10 + memory: 20Mi +``` + +then create child quota: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: false + quota.scheduling.koordinator.sh/parent: "quota-parent-example" +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 20 + memory: 20Mi +``` + +```bash +kubectl apply -f quota-example.yaml +Error from server: error when creating "quota-example.yaml": admission webhook "vquota.kb.io" denied the request: checkMinQuotaSum allChildren SumMinQuota > parentMinQuota, parent: quota-parent-example +``` + +2.Parent and child's min\max resource key must same. +first create parent quota: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-parent-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: true +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 10 + memory: 20Mi +``` + +then create child quota: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: false + quota.scheduling.koordinator.sh/parent: "quota-parent-example" +spec: + max: + cpu: 40 + memory: 40Gi + test: 200 + min: + cpu: 10 + memory: 20Mi +``` + +```bash +$ kubectl apply -f quota-example.yaml + Error from server: error when creating "quota-example.yaml": admission webhook "vquota.kb.io" denied the request: checkSubAndParentGroupMaxQuotaKeySame failed: quota-parent-example's key is not the same with quota-example +``` + +3.Parent group cannot run pod. + +first create parent quota: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-parent-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: true +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 10 + memory: 20Mi +``` + +then create pod: +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example + namespace: default + labels: + quota.scheduling.koordinator.sh/name: "quota-parent-example" +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +```bash +$ kubectl apply -f pod-example_xb.yaml + Error from server: error when creating "pod-example.yaml": admission webhook "vpod.kb.io" denied the request: pod can not be linked to a parentQuotaGroup,quota:quota-parent-example, pod:pod-example +``` + +4.The parent of node can only be parent group, not child group. + +first create parent quota: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-parent-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: false +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 10 + memory: 20Mi +``` + +then create child quota: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: false + quota.scheduling.koordinator.sh/parent: "quota-parent-example" +spec: + max: + cpu: 40 + memory: 40Gi + test: 200 + min: + cpu: 10 + memory: 20Mi +``` + +```bash +$ kubectl apply -f quota-example.yaml + Error from server: error when creating "elastic-quota-example_xb.yaml": admission webhook "vquota.kb.io" denied the request: quota-example has parentName quota-parent-example but the parentQuotaInfo's IsParent is false +``` + +5.A quota group can't be converted on the attribute of parent group\child group. + +first create parent quota: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-parent-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: true +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 10 + memory: 20Mi +``` + +then modify `quota.scheduling.koordinator.sh/is-parent:false`: +```bash +$ kubectl apply -f quota-parent-example.yaml + elastic-quota-example_xb_parent.yaml": admission webhook "vquota.kb.io" denied the request: IsParent is forbidden modify now, quotaName:quota-parent-example +``` + +### used > runtime revoke +We offer a config to control if quota's used > runtime, we allow the scheduler to delete over-resource-used pod from +low priority to high priority. you should follow the below config of `koord-scheduler-config.yaml` in helm. + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: koord-scheduler-config + namespace: {{ .Values.installation.namespace }} +data: + koord-scheduler-config: | + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: KubeSchedulerConfiguration + leaderElection: + leaderElect: true + resourceLock: leases + resourceName: koord-scheduler + resourceNamespace: {{ .Values.installation.namespace }} + profiles: + - pluginConfig: + - name: ElasticQuota + args: + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: ElasticQuotaArgs + quotaGroupNamespace: {{ .Values.installation.namespace }} + monitorAllQuotas: true + revokePodInterval: 60s + delayEvictTime: 300s + plugins: + queueSort: + disabled: + - name: "*" + enabled: + - name: Coscheduling + preFilter: + enabled: + - name: NodeNUMAResource + - name: DeviceShare + - name: Reservation + - name: Coscheduling + - name: ElasticQuota + filter: + ... +``` +- `monitorAllQuotas` enable "used > runtime revoke" logic. Default is false. +- `revokePodInterval` check loop time interval. +- `delayEvictTime` when "used > runtime" continues over `delayEvictTime` will really trigger eviction. + +To let scheduler can really delete the pod successfully, you should config the `rbac/koord-scheduler.yaml` as below in helm. + +```yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: koord-scheduler-role +rules: +{{- if semverCompare "<= 1.20-0" .Capabilities.KubeVersion.Version }} +- apiGroups: + - "" + resources: + - namespaces + verbs: + - get + - list + - watch +{{- end }} +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - update +- apiGroups: + - "" + resources: + - pods + verbs: + - patch + - update + - delete +- apiGroups: + ... +``` diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/performance-collector.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/performance-collector.md new file mode 100644 index 000000000..846d2dd6c --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/performance-collector.md @@ -0,0 +1,188 @@ +# Performance Collector + +## 背景 + +在真实的生产环境下,单机的运行时状态是一个“混沌系统”,资源竞争产生的应用干扰无法绝对避免。Koordinator正在建立干扰检测与优化的能力,通过提取应用运行状态的指标,进行实时的分析和检测,在发现干扰后对目标应用和干扰源采取更具针对性的策略。 +Koordinator已经实现了一系列`Performance Collector`,在单机侧采集与应用运行状态高相关性的底层指标,并通过`Prometheus`暴露出来,为干扰检测能力和集群应用调度提供支持。 + +## 使用方法 + +### 准备条件 + +- Kubernetes >= 1.18 + +- Koordinator >= 1.0 + +- 若您使用CPI Collector,请确保您的机器支持获取Cycles、Instructions这两个Kernel PMU(Performance Monitoring Unit)事件。 + + > 使用如下命令检查是否支持 + + ```shell + $ perf list + List of pre-defined events (to be used in -e): + + branch-instructions OR branches [Hardware event] + branch-misses [Hardware event] + bus-cycles [Hardware event] + ... + + cpu-cycles OR cpu/cpu-cycles/ [Kernel PMU event] + ... + instructions OR cpu/instructions/ [Kernel PMU event] + ``` + +- 若您使用PSI Collector,您需要在Anolis OS中开启PSI功能,您可以参考[文档](https://help.aliyun.com/document_detail/155464.html)获取开启方法。 + +### 安装 + +请确保Koordinator的相关组件已被正确安装于您的集群中。您可以参考文档[Installation](https://koordinator.sh/zh-Hans/docs/installation)来获取相关的安装方法。 + +### feature-gates + +Performance Collector由多个feature-gate进行控制,Koordinator目前提供一下几个指标采集器: + +- `CPICollector`:用于控制CPI指标采集器。CPI:Cycles Per Instruction。 +- `PSICollector`:用于控制PSI指标采集器。PSI:Pressure Stall Information。 + +### 配置 + +Performance Collector目前是默认关闭的。您可以通过修改Koordlet的feature-gates项来使用它,此项修改不会影响其他feature-gate + +```shell +kubectl edit ds koordlet -n koordinator-system +``` + +```shell +... +spec: + ... + spec: + containers: + - args: + ... + # modify here + # - -feature-gates=BECPUEvict=true,BEMemoryEvict=true,CgroupReconcile=true,Accelerators=true + - -feature-gates=BECPUEvict=true,BEMemoryEvict=true,CgroupReconcile=true,Accelerators=true,CPICollector=true,PSICollector=true + ... +``` + +## 开销对比 + +Koordinator Performance Collector作为干扰检测的重要工具,其核心目标之一为在低成本、无自身干扰引入的情况下采集相关指标。下文展示了开启Performance Collector前后Koordinator引入的系统开销。用户可参考此测试结果使用Performance Collector功能。 + +### 测试环境 + +- 阿里云容器服务Kubernetes版(ACK)托管版集群: + - Kubernetes版本:1.24.6-aliyun.1 + - 容器运行时:containerd 1.5.13 + - 节点规格:ecs.ebmg6.26xlarge,104 vCPU 384 GiB,操作系统Alibaba Cloud Linux 2.1903 +- 节点负载: + - 测试Pod镜像:nginx:1.14.2 + - 单节点Pod数量:100 test Pod + 50 system Pod + - 单节点容器数量:150 + - 系统CPU usage水位:25%左右,使用lookbusy-1.4工具在每个CPU核上生产压力 +- 其他条件: + - 100个nginx Pod由Linux cronjob管理,每五分钟删除一次。Deployment控制器将会随之进行重建。 + - CPI Collector的运行时间窗口为每60秒一次,每次持续时长10秒。 + - PSI Collector每10秒采集一次。 + - 测试在Performance Collector开启前后均运行一小时。 + +### 测试结论 + +#### Case 1:Koordlet容器运行Performance Collector前后开销对比 + +Performance Collector运行于Koordinator的Koordlet组件,现将其对该组件的开销对比如下: + +- 总体开销无明显上升: + + | 关键指标 | 开启前 | 开启后 | + | :--------------: | :------: | :--------: | + | RSS Memory usage | 341MiB | 366MiB | + | CPU usage | 0.5 core | 0.6 core | + | 网络I/O | - | 无明显变化 | + +- 性能开销原因分析: + - 新增Container维度的CPI、Container和Pod维度的PSI数据表 + - 每cgroup唯一的采集器goroutine带来的性能消耗 + - Prometheus上报数据仪表盘带来的少量内存消耗 + +#### Case 2:运行Performance Collector后节点开销对比 + +Performance Collector使用了perf_event_open(2)系统调用,并开启了节点上的PSI功能,现将其对节点影响对比如下: + +- 无明显开销增长: + + | 关键指标 | 开启前 | 开启后 | + | :-------------: | :----: | :----: | + | 内核态CPU使用率 | 0.94% | 0.96% | + | 用户态CPU使用率 | 24.51% | 25.19% | + +- 性能开销原因分析: + - perf_event_open(2)的使用 + - PSI功能的开启 + +## 实例 + +1. 打开想要使用的Performance Collector: +```shell +helm install koordinator https://... --set featureGates="CPICollector=true,PSICollector=true" +``` + +2. 使用如下flag配置指标采集器的时间窗口、采集间隔等: + + | flag名称 | 默认值 | 含义 | + | :-----------------------------: | :----: | :-----------------------------: | + | -cpi-collector-interval-seconds | 60 | CPI指标采集的时间间隔,单位为秒 | + | -collect-cpi-timewindow-seconds | 10 | CPI指标采集的时间窗口,单位为秒 | + | -psi-collector-interval-seconds | 10 | PSI指标采集的时间间隔,单位为秒 | + +3. 您可以在Prometheus指标暴露端口(默认为9316)处观察到采集到的指标,查询 API为`/metrics`,CPI指标以*cycles*和*instructions*两条记录分开展示: +```shell +$ curl http://localhost:9316/metrics + +# HELP koordlet_container_cpi Container cpi collected by koordlet +# TYPE koordlet_container_cpi gauge +koordlet_container_cpi{container_id="containerd://498de02ddd3ad7c901b3c80f96c57db5b3ed9a817dbfab9d16b18be7e7d2d047",container_name="koordlet",cpi_field="cycles",node="your-node-name",pod_name="koordlet-x8g2j",pod_namespace="koordinator-system",pod_uid="3440fb9c-423b-48e9-8850-06a6c50f633d"} 2.228107503e+09 +koordlet_container_cpi{container_id="containerd://498de02ddd3ad7c901b3c80f96c57db5b3ed9a817dbfab9d16b18be7e7d2d047",container_name="koordlet",cpi_field="instructions",node="your-node-name",pod_name="koordlet-x8g2j",pod_namespace="koordinator-system",pod_uid="3440fb9c-423b-48e9-8850-06a6c50f633d"} 4.1456092e+09 +``` + +4. 同时,我们提供ServiceMonitor用于暴露Koordlet采集的指标: + + ```yaml + apiVersion: v1 + kind: Service + metadata: + labels: + koord-app: koordlet + name: koordlet + namespace: koordinator-system + spec: + clusterIP: None + ports: + - name: koordlet-service + port: 9316 + targetPort: 9316 + selector: + koord-app: koordlet + --- + apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + labels: + koord-app: koordlet + name: koordlet + namespace: koordinator-system + spec: + endpoints: + - interval: 30s + port: koordlet-service + scheme: http + jobLabel: koord-app + selector: + matchLabels: + koord-app: koordlet + ``` + + 您可以在部署后于Prometheus的Targets中找到并使用: + + ![koordlet-servicemonitor-prometheus](/img/koordlet-servicemonitor-prometheus.png) diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/pod-migration-job.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/pod-migration-job.md new file mode 100644 index 000000000..37c9d5d21 --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/pod-migration-job.md @@ -0,0 +1,254 @@ +# PodMigrationJob + +Koordinator定义了一个基于 CRD 的 Pod 迁移 API,称为 `PodMigrationJob`,通过此 API,重调度器(descheduler)或其他自动故障恢复组件可以更安全地将 Pod 驱逐或删除。 + +## 介绍 + +迁移 Pods 是许多组件(如descheduler)依赖的重要能力,可用于优化调度或帮助解决工作负载运行时质量问题。我们认为,Pod 迁移是一个复杂的过程,涉及诸如审计(auditing)、资源分配和应用程序启动等步骤,并与应用程序升级、伸缩等场景以及集群管理员的资源操作和维护操作混合在一起。因此,如何管理此过程的稳定性风险,以确保应用程序不会因为 Pod 迁移而失败,是必须解决的关键的问题。 + +基于 PodMigrationJob CRD 的最终状态导向迁移能力,我们可以跟踪迁移过程中每个过程的状态,感知应用程序升级和扩展等场景,以确保工作负载的稳定性。 + +## 设置 + +### 前置条件 + +- Kubernetes >= 1.18 +- Koordinator >= 0.6 + +### Installation + +请确保Koordinator组件已正确安装在您的集群中。如果未安装,请参考[安装](/docs/installation). + +### Configurations + +PodMigrationJob 已默认启用。您可以在koord-descheduler配置中无需任何修改即可使用它。 + +## 使用 PodMigrationJob + +### 快速开始 + +1. 使用下面的YAML文件创建一个名为`pod-demo`的Deployment + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: pod-demo + namespace: default +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: pod-demo + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + creationTimestamp: null + labels: + app: pod-demo + name: stress + spec: + containers: + - args: + - -c + - "1" + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + limits: + cpu: "2" + memory: 4Gi + requests: + cpu: 200m + memory: 400Mi + restartPolicy: Always + schedulerName: koord-scheduler +``` + +```bash +$ kubectl create -f pod-demo.yaml +deployment.apps/pod-demo created +``` + +2. 检查Pod `pod-demo-0` 的调度结果 + +```bash +$ kubectl get pod -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +pod-demo-5f9b977566-c7lvk 1/1 Running 0 41s 10.17.0.9 node-0 +``` + +`pod-demo-5f9b977566-c7lvk` 被调度在节点 `node-0`上 + +3. 使用下面的YAML文件创建一个 `PodMigrationJob` 来迁移 `pod-demo-0` + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: PodMigrationJob +metadata: + name: migrationjob-demo +spec: + paused: false + ttl: 5m + mode: ReservationFirst + podRef: + namespace: default + name: pod-demo-5f9b977566-c7lvk +status: + phase: Pending +``` + +```bash +$ kubectl create -f migrationjob-demo.yaml +podmigrationjob.scheduling.koordinator.sh/migrationjob-demo created +``` + +5. 查看迁移状态 + +```bash +$ kubectl get podmigrationjob migrationjob-demo +NAME PHASE STATUS AGE NODE RESERVATION PODNAMESPACE POD NEWPOD TTL +migrationjob-demo Succeed Complete 37s node-1 d56659ab-ba16-47a2-821d-22d6ba49258e default pod-demo-5f9b977566-c7lvk pod-demo-5f9b977566-nxjdf 5m0s +``` + +从上述结果可以观察到: +- **PHASE** 为 `Succeed`, **STATUS** 为 `Complete`, 表明迁移成功; +- **NODE** `node-1` 表示迁移后新Pod所调度的节点; +- **RESERVATION** `d56659ab-ba16-47a2-821d-22d6ba49258e` 是在迁移期间创建的 Reservation。PodMigrationJob Controller 将在开始驱逐 Pod 之前尝试为 Reservation 创建预留资源。在成功预留资源后,将启动驱逐操作,这可以确保新 Pod 必须被驱逐,因为已有资源可用; +- **PODNAMESPACE** `default` 表示待迁移 Pod 所在的命名空间; +- **POD** `pod-demo-5f9b977566-c7lvk` 表示待迁移的 Pod; +- **NEWPOD** `pod-demo-5f9b977566-nxjdf` 表示迁移后新创建的 Pod; +- **TTL** 表示当前作业的 TTL 周期。 + +6. 查看迁移事件 + +PodMigrationJob Controller 将在迁移过程的重要步骤中创建事件,以帮助用户诊断迁移问题 + +```bash +$ kubectl describe podmigrationjob migrationjob-demo +... +Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal ReservationCreated 8m33s koord-descheduler Successfully create Reservation "d56659ab-ba16-47a2-821d-22d6ba49258e" + Normal ReservationScheduled 8m33s koord-descheduler Assigned Reservation "d56659ab-ba16-47a2-821d-22d6ba49258e" to node "node-1" + Normal Evicting 8m33s koord-descheduler Try to evict Pod "default/pod-demo-5f9b977566-c7lvk" + Normal EvictComplete 8m koord-descheduler Pod "default/pod-demo-5f9b977566-c7lvk" has been evicted + Normal Complete 8m koord-descheduler Bind Pod "default/pod-demo-5f9b977566-nxjdf" in Reservation "d56659ab-ba16-47a2-821d-22d6ba49258e" +``` + +### 高级配置 + +> 最新的API可以查看[`pod_migration_job_types.go`](https://github.com/koordinator-sh/koordinator/blob/main/apis/scheduling/v1alpha1/pod_migration_job_types.go). + +### 示例: 手动确认是否允许迁移 + +驱逐或迁移操作会带来稳定性风险,因此希望在启动迁移操作之前手动检查和确认没有错误,然后再启动迁移。 + +因此,在创建 PodMigrationJob 时,将 `spec.paused` 设置为 `true`,手动确认允许执行后再将 `spec.paused` 设置为 `false`。如果拒绝执行,则可以更新 `status.phase=Failed` 立即终止PodMigrationJob 的执行,或者等待 PodMigrationJob 自动过期。 + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: PodMigrationJob +metadata: + name: migrationjob-demo +spec: + # paused indicates whether the PodMigrationJob should to work or not. + paused: true + # ttl controls the PodMigrationJob timeout duration. + ttl: 5m + mode: ReservationFirst + podRef: + namespace: default + name: pod-demo-5f9b977566-c7lvk +status: + phase: Pending +``` + +### 示例: 只想驱逐 Pods, 无需预留资源 + +PodMigrationJob 提供两种迁移模式: +- `EvictDirectly` 直接驱逐 Pod,无需预留资源, +- `ReservationFirst` 先预留资源,以确保在开始驱逐之前可以分配资源。 + +如果你只想驱逐 Pod,只需将 `spec.mode` 设置为 `EvictDirectly`。 + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: PodMigrationJob +metadata: + name: migrationjob-demo +spec: + paused: false + ttl: 5m + mode: EvictDirectly + podRef: + namespace: default + name: pod-demo-5f9b977566-c7lvk +status: + phase: Pending +``` + +### 示例: 在迁移中使用预留资源 + +在某些情况下,首先预留资源,然后在成功后创建一个 PodMigrationJob,以重复使用 PodMigrationJob Controller 提供的仲裁机制(在v0.7中实现)以确保工作负载的稳定性。 + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: PodMigrationJob +metadata: + name: migrationjob-demo +spec: + paused: false + ttl: 5m + mode: ReservationFirst + podRef: + namespace: default + name: pod-demo-5f9b977566-c7lvk + reservationOptions: + # the reservation-0 created before creating PodMigrationJob + reservationRef: + name: reservation-0 +status: + phase: Pending +``` + +### 示例: 优雅驱逐 Pods + +PodMigrationJob 支持 Pod 的优雅驱逐。 + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: PodMigrationJob +metadata: + name: migrationjob-demo +spec: + paused: true + ttl: 5m + mode: ReservationFirst + podRef: + namespace: default + name: pod-demo-5f9b977566-c7lvk + deleteOptions: + # The duration in seconds before the object should be deleted. Value must be non-negative integer. + # The value zero indicates delete immediately. If this value is nil, the default grace period for the + # specified type will be used. + # Defaults to a per object value if not specified. zero means delete immediately. + gracePeriodSeconds: 60 +status: + phase: Pending +``` + + +### 已知问题 +- 当前不支持[Arbitration mechanism](https://github.com/koordinator-sh/koordinator/blob/main/docs/proposals/scheduling/20220701-pod-migration-job.md#filter-podmigrationjob),v0.6版本仅实现了基于资源预留的迁移能力。 +- 目前不支持[Basic Migration API](https://github.com/koordinator-sh/koordinator/blob/main/docs/proposals/scheduling/20220701-pod-migration-job.md#basic-migration-api) 。 diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/resource-reservation.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/resource-reservation.md new file mode 100644 index 000000000..403ad20fa --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/resource-reservation.md @@ -0,0 +1,443 @@ +# 资源预留 + +资源预留是koord-scheduler的一种为某些特定Pod或负载预留节点资源的能力。 + +## 介绍 + +Pod是kubernetes节点资源分配的基础载体,他根据业务逻辑绑定对应的资源需求。但是我们可能分为一些还没创建的特定Pod和负载分配资源,例如: + +1. 抢占:已经存在的抢占规则不能保证只有正在抢占中的Pod才能分配抢占的资源,我们期望调度器能锁定资源,防止这些资源被有相同或更高优先级的其他Pod抢占。 +2. 重调度:在重调度场景下,最好能保证在Pod被重调度之前保留足够的资源。否则,被重调度的Pod可能再也没法运行,然后对应的应用可能就会崩溃。 +3. 水平扩容:为了能更精准地进行水平扩容,我们希望能为扩容的Pod副本分配节点资源。 +4. 资源预分配:即使当前的资源还不可用,我们可能想为将来的资源需求提前预留节点资源。 + +为了增强kubernetes的资源调度能力,koord-scheduler提供了一个名字叫`Reservation`的调度API,允许我们为一些当前还未创建的特定的Pod和负载,提前预留节点资源。 + +![image](/img/resource-reservation.svg) + +更多信息,请看 [设计文档:资源预留](../designs/resource-reservation)。 + +## 设置 + +### 前提 + +- Kubernetes >= 1.18 +- Koordinator >= 0.6 + +### 安装步骤 + +请确保Koordinator的组件已经在你的集群中正确安装,如果还未正确安装,请参考[安装说明](/docs/installation)。 + +### 配置 + +资源预留功能默认*启用*,你无需对koord-scheduler配置做任何修改,即可使用。 + +## 使用指南 + +### 快速上手 + +1. 使用如下yaml文件预留资源:`reservation-demo`。 + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: Reservation +metadata: + name: reservation-demo +spec: + template: # set resource requirements + namespace: default + spec: + containers: + - args: + - '-c' + - '1' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: # reserve 500m cpu and 800Mi memory + requests: + cpu: 500m + memory: 800Mi + schedulerName: koord-scheduler # use koord-scheduler + owners: # set the owner specifications + - object: # owner pods whose name is `default/pod-demo-0` + name: pod-demo-0 + namespace: default + ttl: 1h # set the TTL, the reservation will get expired 1 hour later +``` + +```bash +$ kubectl create -f reservation-demo.yaml +reservation.scheduling.koordinator.sh/reservation-demo created +``` + +2. 跟踪reservation-demo的状态,直到它变成可用状态。 + +```bash +$ kubectl get reservation reservation-demo -o wide +NAME PHASE AGE NODE TTL EXPIRES +reservation-demo Available 88s node-0 1h +``` + +3. 使用如下YAML文件部署一个Pod:`Pod-demo-0`。 + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-demo-0 # match the owner spec of `reservation-demo` +spec: + containers: + - args: + - '-c' + - '1' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + limits: + cpu: '1' + memory: 1Gi + requests: + cpu: 200m + memory: 400Mi + restartPolicy: Always + schedulerName: koord-scheduler # use koord-scheduler +``` + +```bash +$ kubectl create -f pod-demo-0.yaml +pod/pod-demo-0 created +``` + +4. 检查`Pod-demo-0`的调度状态。 + +```bash +$ kubectl get pod pod-demo-0 -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +pod-demo-0 1/1 Running 0 32s 10.17.0.123 node-0 +``` + +`Pod-demo-0`将会和`reservation-demo`被调度到同一个节点。 + +5. 检查`reservation-demo`的状态。 + +```bash +$ kubectl get reservation reservation-demo -oyaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: Reservation +metadata: + name: reservation-demo + creationTimestamp: "YYYY-MM-DDT05:24:58Z" + uid: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + ... +spec: + owners: + - object: + name: pod-demo-0 + namespace: default + template: + spec: + containers: + - args: + - -c + - "1" + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + requests: + cpu: 500m + memory: 800Mi + schedulerName: koord-scheduler + ttl: 1h +status: + allocatable: # total reserved + cpu: 500m + memory: 800Mi + allocated: # current allocated + cpu: 200m + memory: 400Mi + conditions: + - lastProbeTime: "YYYY-MM-DDT05:24:58Z" + lastTransitionTime: "YYYY-MM-DDT05:24:58Z" + reason: Scheduled + status: "True" + type: Scheduled + - lastProbeTime: "YYYY-MM-DDT05:24:58Z" + lastTransitionTime: "YYYY-MM-DDT05:24:58Z" + reason: Available + status: "True" + type: Ready + currentOwners: + - name: pod-demo-0 + namespace: default + uid: yyyyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy + nodeName: node-0 + phase: Available +``` + +现在我们可以看到`reservation-demo`预留了500m cpu和 800Mi内存, `Pod-demo-0`从预留的资源中分配了200m cpu and 400Mi内存。 + +6. 清理`reservation-demo`的预留资源。 + +```bash +$ kubectl delete reservation reservation-demo +reservation.scheduling.koordinator.sh "reservation-demo" deleted +$ kubectl get pod pod-demo-0 +NAME READY STATUS RESTARTS AGE +pod-demo-0 1/1 Running 0 110s +``` + +在预留资源被删除后,`Pod-demo-0`依然正常运行。 + +### 高级特性 + +> 最新的API可以在这里查看: [`reservation_types`](https://github.com/koordinator-sh/koordinator/blob/main/apis/scheduling/v1alpha1/reservation_types.go)。 + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: Reservation +metadata: + name: reservation-demo +spec: + # pod template (required): Reserve resources and play pod/node affinities according to the template. + # The resource requirements of the pod indicates the resource requirements of the reservation + template: + namespace: default + spec: + containers: + - args: + - '-c' + - '1' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + requests: + cpu: 500m + memory: 800Mi + # scheduler name (required): use koord-scheduler to schedule the reservation + schedulerName: koord-scheduler + # owner spec (required): Specify what kinds of pods can allocate resources of this reservation. + # Currently support three kinds of owner specifications: + # - object: specify the name, namespace, uid of the owner pods + # - controller: specify the owner reference of the owner pods, e.g. name, namespace(extended by koordinator), uid, kind + # - labelSelector: specify the matching labels are matching expressions of the owner pods + owners: + - object: + name: pod-demo-0 + namespace: default + - labelSelector: + matchLabels: + app: app-demo + # TTL (optional): Time-To-Live duration of the reservation. The reservation will get expired after the TTL period. + # If not set, use `24h` as default. + ttl: 1h + # Expires (optional): Expired timestamp when the reservation is expected to expire. + # If both `expires` and `ttl` are set, `expires` is checked first. + expires: "YYYY-MM-DDTHH:MM:SSZ" +``` + + + +### 案例:多个属主在同一个节点预留资源 + +1. 检查每个节点的可分配资源。 + +```bash +$ kubectl get node -o custom-columns=NAME:.metadata.name,CPU:.status.allocatable.cpu,MEMORY:.status.allocatable.memory +NAME CPU MEMORY +node-0 7800m 28625036Ki +node-1 7800m 28629692Ki +... +$ kubectl describe node node-1 | grep -A 8 "Allocated resources" + Allocated resources: + (Total limits may be over 100 percent, i.e., overcommitted.) + Resource Requests Limits + -------- -------- ------ + cpu 780m (10%) 7722m (99%) + memory 1216Mi (4%) 14044Mi (50%) + ephemeral-storage 0 (0%) 0 (0%) + hugepages-1Gi 0 (0%) 0 (0%) + hugepages-2Mi 0 (0%) 0 (0%) +``` + +如上图,`node-1`节点还保留7.0 cpu and 26Gi memory未分配。 + +2. 用如下YAML文件预留资源:`reservation-demo-big`。 + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: Reservation +metadata: + name: reservation-demo-big +spec: + template: + namespace: default + spec: + containers: + - args: + - '-c' + - '1' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: # reserve 6 cpu and 20Gi memory + requests: + cpu: 6 + memory: 20Gi + nodeName: node-1 # set the expected node name to schedule at + schedulerName: koord-scheduler + owners: # set multiple owners + - object: # owner pods whose name is `default/pod-demo-0` + name: pod-demo-1 + namespace: default + - labelSelector: # owner pods who have label `app=app-demo` can allocate the reserved resources + matchLabels: + app: app-demo + ttl: 1h +``` + +```bash +$ kubectl create -f reservation-demo-big.yaml +reservation.scheduling.koordinator.sh/reservation-demo-big created +``` + +3. 跟踪`reservation-demo-big`的状态,直到他变成可用状态。 + +```bash +$ kubectl get reservation reservation-demo-big -o wide +NAME PHASE AGE NODE TTL EXPIRES +reservation-demo-big Available 37s node-1 1h +``` + +`reservation-demo-big`将被调度到Pod模板中设置的nodeName属性节点:`node-1`。 + +4. 用如下YAML文件创建一次部署:`app-demo`。 + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: app-demo +spec: + replicas: 2 + selector: + matchLabels: + app: app-demo + template: + metadata: + name: stress + labels: + app: app-demo # match the owner spec of `reservation-demo-big` + spec: + schedulerName: koord-scheduler # use koord-scheduler + containers: + - name: stress + image: polinux/stress + args: + - '-c' + - '1' + command: + - stress + resources: + requests: + cpu: 2 + memory: 10Gi + limits: + cpu: 4 + memory: 20Gi +``` + +```bash +$ kubectl create -f app-demo.yaml +deployment.apps/app-demo created +``` + +5. 检查`app-demo`的Pod调度结果. + +```bash +k get pod -l app=app-demo -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +app-demo-798c66db46-ctnbr 1/1 Running 0 2m 10.17.0.124 node-1 +app-demo-798c66db46-pzphc 1/1 Running 0 2m 10.17.0.125 node-1 +``` + +`app-demo`的Pod将会被调度到`reservation-demo-big`所在的节点。 + +6. 检查`reservation-demo-big`的状态。 + +```bash +$ kubectl get reservation reservation-demo-big -oyaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: Reservation +metadata: + name: reservation-demo-big + creationTimestamp: "YYYY-MM-DDT06:28:16Z" + uid: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + ... +spec: + owners: + - object: + name: pod-demo-0 + namespace: default + template: + spec: + containers: + - args: + - -c + - "1" + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + requests: + cpu: 500m + memory: 800Mi + schedulerName: koord-scheduler + ttl: 1h +status: + allocatable: + cpu: 6 + memory: 20Gi + allocated: + cpu: 4 + memory: 20Gi + conditions: + - lastProbeTime: "YYYY-MM-DDT06:28:17Z" + lastTransitionTime: "YYYY-MM-DDT06:28:17Z" + reason: Scheduled + status: "True" + type: Scheduled + - lastProbeTime: "YYYY-MM-DDT06:28:17Z" + lastTransitionTime: "YYYY-MM-DDT06:28:17Z" + reason: Available + status: "True" + type: Ready + currentOwners: + - name: app-demo-798c66db46-ctnbr + namespace: default + uid: yyyyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy + - name: app-demo-798c66db46-pzphc + namespace: default + uid: zzzzzzzz-zzzz-zzzz-zzzzzzzzzzzz + nodeName: node-1 + phase: Available +``` + +现在我们能看到`reservation-demo-big`预留了6 cpu和20Gi内存,`app-demo`从预留的资源中分配了4 cpu and 20Gi内存,预留资源的分配不会增加节点资源的请求容量,否则`node-1`的请求资源总容量将会超过可分配的资源容量。而且当有足够的未分配的预留资源时,这些预留资源可以被同时分配给多个属主。 diff --git a/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/slo-config.md b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/slo-config.md new file mode 100644 index 000000000..c565856ef --- /dev/null +++ b/i18n/zh-Hans/docusaurus-plugin-content-docs/version-v1.4/user-manuals/slo-config.md @@ -0,0 +1,406 @@ +# SLO 配置 + +## 简介 + +Koordinator 使用一个 ConfigMap 管理 SLO 配置。该 ConfigMap 被 slo-controller 所使用,它的名字和命名空间可以在 koord-manager 的启 +动参数中指定(默认为 `koordinator-system/slo-controller-config`)。它分别包含了以下键值: + +- `colocation-config`:混部配置。例如,是否开启混部 Batch 资源,混部水位线。 +- `resource-threshold-config`:基于阈值的压制/驱逐策略的配置。例如,CPU 压制的阈值,内存驱逐的阈值。 +- `resource-qos-config`:QoS 特性的配置。例如,BE pods 的 Group Identity,LS pods 的内存 QoS,BE pods 的末级缓存划分。 +- `cpu-burst-config`:CPU Burst 特性的配置。例如,pod 的最大 burst 比例。 +- `system-config`:系统设定的配置。例如,全局内存最低水位线系数 `min_free_kbytes`。 + +### 配置层级 + +每个配置定义为集群级别和节点级别的形式。 + +例如, + +```go +type ColocationCfg struct { +ColocationStrategy `json:",inline"` +NodeConfigs []NodeColocationCfg `json:"nodeConfigs,omitempty"` +} + +type ResourceQOSCfg struct { +ClusterStrategy *slov1alpha1.ResourceQOSStrategy `json:"clusterStrategy,omitempty"` +NodeStrategies []NodeResourceQOSStrategy `json:"nodeStrategies,omitempty"` +} +``` + +集群级别配置用于设置全局配置,而节点级别则供用户调整部分节点的配置,特别是灰度部署的情况。 + +请注意,大部分可配置的字段都在组件内部(koordlet、koord-manager)有默认值,所以通常仅需要编辑变更的参数。 + +### NodeSLO + +SLO 配置的 data 字段会被 koord-manager 解析。Koord-manager 会检查配置数据是否合法,然后用解析后的配置更新到每个节点的 NodeSLO 对象中。 +如果解析失败,koord-manager 会在 ConfigMap 对象上记录 Events,以警示 unmarshal 错误。对于 agent 组件 koordlet,它会 watch NodeSLO +的 Spec,并对节点的 QoS 特性进行调谐。 + +```yaml +apiVersion: slo.koordinator.sh/v1alpha1 +kind: NodeSLO +metadata: + name: test-node +spec: + cpuBurstStrategy: {} + extensions: {} + resourceQOSStrategy: {} + systemStrategy: {} + # parsed from the `resource-threshold-config` data + resourceUsedThresholdWithBE: + cpuSuppressPolicy: cpuset + cpuSuppressThresholdPercent: 65 + enable: true + memoryEvictThresholdPercent: 70 +``` + +## 配置 + +> 参考版本:Koordinator v1.2 + +SLO 配置的模板如下: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + # colocation-config is the configuration for colocation. + # Related features: Dynamic resource over-commitment, Load-aware scheduling, Load-aware descheduling. + # - enable: whether to enable the colocation. If false, the reclaimed resources of the node allocatable (e.g. `kubernetes.io/batch-cpu`) will be removed. + # - metricAggregateDurationSeconds: the aggregated duration of node metrics reporting. + # - metricReportIntervalSeconds: the reporting interval of the node metrics. + # - metricAggregatePolicy: policies of reporting node metrics in different durations. + # - cpuReclaimThresholdPercent: the reclaim threshold for calculating the reclaimed cpu resource. Basically, the reclaimed resource cannot reclaim the unused resources which are exceeding the threshold. + # - memoryReclaimThresholdPercent: the reclaim threshold for calculating the reclaimed memory resource. Basically, the reclaimed resource cannot reclaim the unused resources which are exceeding the threshold. + # - memoryCalculatePolicy: the policy for calculating the reclaimable memory resource. If set to `request`, only unallocated memory resource of high-priority pods are reclaimable, and no allocated memory can be reclaimed. + # - degradeTimeMinutes: the threshold duration to degrade the colocation for which the node metrics has not been updated. + # - updateTimeThresholdSeconds: the threshold duration to force updating the reclaimed resources with the latest calculated result. + # - resourceDiffThreshold: the threshold to update the reclaimed resources than which the calculated reclaimed resources is different from the current. + # - nodeConfigs: the node-level configurations which matches the nodes via the node selector and overrides the cluster configuration. + colocation-config: | + { + "enable": false, + "metricAggregateDurationSeconds": 300, + "metricReportIntervalSeconds": 60, + "metricAggregatePolicy": { + "durations": [ + "5m", + "10m", + "15m" + ] + }, + "cpuReclaimThresholdPercent": 60, + "memoryReclaimThresholdPercent": 65, + "memoryCalculatePolicy": "usage", + "degradeTimeMinutes": 15, + "updateTimeThresholdSeconds": 300, + "resourceDiffThreshold": 0.1, + "nodeConfigs": [ + { + "name": "anolis", + "nodeSelector": { + "matchLabels": { + "kubernetes.io/kernel": "anolis" + } + }, + "updateTimeThresholdSeconds": 360, + "resourceDiffThreshold": 0.2 + } + ] + } + # The configuration for threshold-based strategies. + # Related features: BECPUSuppress, BEMemoryEvict, BECPUEvict. + # - clusterStrategy: the cluster-level configuration. + # - nodeStrategies: the node-level configurations which matches the nodes via the node selector and overrides the cluster configuration. + # - enable: whether to enable the threshold-based strategies or not. If false, all threshold-based strategies are disabled. If set to true, CPU Suppress and Memory Evict are enabled by default. + # - cpuSuppressThresholdPercent: the node cpu utilization threshold to suppress BE pods' usage. + # - cpuSuppressPolicy: the policy of cpu suppression. If set to `cpuset`, the BE pods' `cpuset.cpus` will be reconciled when suppression. If set to `cfsQuota`, the BE pods' `cpu.cfs_quota_us` will be reconciled. + # - memoryEvictThresholdPercent: the node memory utilization threshold to evict BE pods. + # - memoryEvictLowerPercent: the node memory utilization threshold to stop the memory eviction. By default, `lowerPercent = thresholdPercent - 2`. + # - cpuEvictBESatisfactionLowerPercent: the cpu satisfaction threshold to start the cpu eviction (also require to meet the BE util threshold). + # - cpuEvictBEUsageThresholdPercent: the BE utilization (BEUsage / BERealLimit) threshold to start the cpu eviction (also require to meet the cpu satisfaction threshold). + # - cpuEvictBESatisfactionUpperPercent: the cpu satisfaction threshold to stop the cpu eviction. + # - cpuEvictTimeWindowSeconds: the time window of the cpu metrics for the cpu eviction. + resource-threshold-config: | + { + "clusterStrategy": { + "enable": false, + "cpuSuppressThresholdPercent": 65, + "cpuSuppressPolicy": "cpuset", + "memoryEvictThresholdPercent": 70, + "memoryEvictLowerPercent": 65, + "cpuEvictBESatisfactionUpperPercent": 90, + "cpuEvictBESatisfactionLowerPercent": 60, + "cpuEvictBEUsageThresholdPercent": 90 + }, + "nodeStrategies": [ + { + "name": "anolis", + "nodeSelector": { + "matchLabels": { + "kubernetes.io/kernel": "anolis" + } + }, + "cpuEvictBEUsageThresholdPercent": 80 + } + ] + } + # The configuration for QoS-based features. + # Related features: CPUQoS (GroupIdentity), MemoryQoS (CgroupReconcile), ResctrlQoS. + # - clusterStrategy: the cluster-level configuration. + # - nodeStrategies: the node-level configurations which matches the nodes via the node selector and overrides the cluster configuration. + # - lsrClass/lsClass/beClass: the configuration for pods of QoS LSR/LS/BE respectively. + # - cpuQOS: the configuration of CPU QoS. + # - enable: whether to enable CPU QoS. If set to `false`, the related cgroup configs will be reset to the system default. + # - groupIdentity: the priority level of the Group Identity ([-1, 2]). `2` means the highest priority, while `-1` means the lowest priority. Anolis OS required. + # - memoryQOS: the configuration of Memory QoS. + # - enable: whether to enable Memory QoS. If set to `false`, the related cgroup configs will be reset to the system default. + # - minLimitPercent: the scale percentage for setting the `memory.min` based on the container's request. It enables the memory protection from the Linux memory reclaim. + # - lowLimitPercent: the scale percentage for setting the `memory.low` based on the container's request. It enables the memory soft protection from the Linux memory reclaim. + # - throttlingPercent: the scale percentage for setting the `memory.high` based on the container's limit. It enables the memory throttling in cgroup level. + # - wmarkRatio: the ratio of container-level asynchronous memory reclaim based on the container's limit. Anolis OS required. + # - wmarkScalePermill: the per-mill of container memory to reclaim in once asynchronous memory reclaim. Anolis OS required. + # - wmarkMinAdj: the adjustment percentage of global memory min watermark. It affects the reclaim priority when the node memory free is quite a few. Anolis OS required. + # - resctrlQOS: the configuration of Resctrl (Intel RDT) QoS. + # - enable: whether to enable Resctrl QoS. + # - catRangeStartPercent: the starting percentage of the L3 Cache way partitioning. L3 CAT required. + # - catRangeEndPercent: the ending percentage of the L3 Cache way partitioning. L3 CAT required. + # - mbaPercent: the allocation percentage of the memory bandwidth. MBA required. + resource-qos-config: | + { + "clusterStrategy": { + "lsrClass": { + "cpuQOS": { + "enable": false, + "groupIdentity": 2 + }, + "memoryQOS": { + "enable": false, + "minLimitPercent": 0, + "lowLimitPercent": 0, + "throttlingPercent": 0, + "wmarkRatio": 95, + "wmarkScalePermill": 20, + "wmarkMinAdj": -25, + "priorityEnable": 0, + "priority": 0, + "oomKillGroup": 0 + }, + "resctrlQOS": { + "enable": false, + "catRangeStartPercent": 0, + "catRangeEndPercent": 100, + "mbaPercent": 100 + } + }, + "lsClass": { + "cpuQOS": { + "enable": false, + "groupIdentity": 2 + }, + "memoryQOS": { + "enable": false, + "minLimitPercent": 0, + "lowLimitPercent": 0, + "throttlingPercent": 0, + "wmarkRatio": 95, + "wmarkScalePermill": 20, + "wmarkMinAdj": -25, + "priorityEnable": 0, + "priority": 0, + "oomKillGroup": 0 + }, + "resctrlQOS": { + "enable": false, + "catRangeStartPercent": 0, + "catRangeEndPercent": 100, + "mbaPercent": 100 + } + }, + "beClass": { + "cpuQOS": { + "enable": false, + "groupIdentity": -1 + }, + "memoryQOS": { + "enable": false, + "minLimitPercent": 0, + "lowLimitPercent": 0, + "throttlingPercent": 0, + "wmarkRatio": 95, + "wmarkScalePermill": 20, + "wmarkMinAdj": 50, + "priorityEnable": 0, + "priority": 0, + "oomKillGroup": 0 + }, + "resctrlQOS": { + "enable": false, + "catRangeStartPercent": 0, + "catRangeEndPercent": 30, + "mbaPercent": 100 + } + } + }, + "nodeStrategies": [ + { + "name": "anolis", + "nodeSelector": { + "matchLabels": { + "kubernetes.io/kernel": "anolis" + } + }, + "beClass": { + "memoryQOS": { + "wmarkRatio": 90 + } + } + } + ] + } + # The configuration for the CPU Burst. + # Related features: CPUBurst. + # - clusterStrategy: the cluster-level configuration. + # - nodeStrategies: the node-level configurations which matches the nodes via the node selector and overrides the cluster configuration. + # - policy: the policy of CPU Burst. If set to `none`, the CPU Burst is disabled. If set to `auto`, the CPU Burst is fully enabled. If set to `cpuBurstOnly`, only the Linux CFS Burst feature is enabled. + # - cpuBurstPercent: the percentage of Linux CFS Burst. It affects the value of `cpu.cfs_burst_us` of pod/container cgroups. It specifies the percentage to which the CPU limit can be increased by CPU Burst. + # - cfsQuotaBurstPercent: the percentage of cfs quota burst. It affects the scaled ratio of `cpu.cfs_quota_us` of pod/container cgroups. It specifies the maximum percentage to which the value of cfs_quota in the cgroup parameters can be increased. + # - cfsQuotaBurstPeriodSeconds: the maximum period of once cfs quota burst. It indicates that the time period in which the container can run with an increased CFS quota is unlimited. + # - sharePoolThresholdPercent: the threshold of share pool utilization. If the share pool utilization is too high, CPU Burst will be stopped and reset to avoid machine overload. + cpu-burst-config: | + { + "clusterStrategy": { + "policy": "none", + "cpuBurstPercent": 1000, + "cfsQuotaBurstPercent": 300, + "cfsQuotaBurstPeriodSeconds": -1, + "sharePoolThresholdPercent": 50 + }, + "nodeStrategies": [ + { + "name": "anolis", + "nodeSelector": { + "matchLabels": { + "kubernetes.io/kernel": "anolis" + } + }, + "policy": "cfsQuotaBurstOnly", + "cfsQuotaBurstPercent": 400 + } + ] + } + # The configuration for system-level settings. + # Related features: SystemConfig. + # - clusterStrategy: the cluster-level configuration. + # - nodeStrategies: the node-level configurations which matches the nodes via the node selector and overrides the cluster configuration. + # - minFreeKbytesFactor: the factor for calculating the global minimum memory free watermark `/proc/sys/vm/min_free_kbytes`. `min_free_kbytes = minFreeKbytesFactor * nodeTotalMemory / 10000`. + # - watermarkScaleFactor: the reclaim factor `/proc/sys/vm/watermark_scale_factor` in once global memory reclaim. + # - memcgReapBackGround: whether to enable the reaper for orphan memory cgroups. + system-config: |- + { + "clusterStrategy": { + "minFreeKbytesFactor": 100, + "watermarkScaleFactor": 150, + "memcgReapBackGround": 0 + } + "nodeStrategies": [ + { + "name": "anolis", + "nodeSelector": { + "matchLabels": { + "kubernetes.io/kernel": "anolis" + } + }, + "minFreeKbytesFactor": 100, + "watermarkScaleFactor": 150 + } + ] + } +``` + +对于更多信息,请查看相关特性的用户手册和设计文档。 + +## 快速开始 + +1. 通过 ConfigMap `koordinator-system/slo-controller-config` 检查当前的 SLO 配置。 + +```bash +$ kubectl get configmap -n koordinator-system slo-controller-config -o yaml +apiVersion: v1 +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: koordinator + meta.helm.sh/release-namespace: default + labels: + app.kubernetes.io/managed-by: Helm + name: slo-controller-config + namespace: koordinator-system +data: + colocation-config: | + { + "enable": false, + "metricAggregateDurationSeconds": 300, + "metricReportIntervalSeconds": 60, + "cpuReclaimThresholdPercent": 60, + "memoryReclaimThresholdPercent": 65, + "memoryCalculatePolicy": "usage", + "degradeTimeMinutes": 15, + "updateTimeThresholdSeconds": 300, + "resourceDiffThreshold": 0.1 + } + resource-threshold-config: | + { + "clusterStrategy": { + "enable": false + } + } +``` + +2. 编辑 ConfigMap `koordinator-system/slo-controller-config` 来修改 SLO 配置。 + +```bash +$ kubectl edit configmap -n koordinator-system slo-controller-config +``` + +例如,ConfigMap 编辑如下: + +```yaml +data: + # ... + resource-threshold-config: | + { + "clusterStrategy": { + "enable": true, + "cpuSuppressThresholdPercent": 60, + "cpuSuppressPolicy": "cpuset", + "memoryEvictThresholdPercent": 60 + } + } +``` + +3. 确认 NodeSLO 是否成功下发。 + +> 注意:默认值会在 NodeSLO 中省略。 + +```bash +$ kubectl get nodeslo.slo.koordinator.sh test-node -o yaml +apiVersion: slo.koordinator.sh/v1alpha1 +kind: NodeSLO +metadata: + name: test-node +spec: + # ... + extensions: {} + resourceUsedThresholdWithBE: + cpuSuppressPolicy: cpuset + cpuSuppressThresholdPercent: 60 + enable: true + memoryEvictThresholdPercent: 60 +``` diff --git a/static/img/cpu-normalization.svg b/static/img/cpu-normalization.svg new file mode 100644 index 000000000..3a0a9b2e1 --- /dev/null +++ b/static/img/cpu-normalization.svg @@ -0,0 +1,3 @@ + + +
APIServer
APIServer
modify Node CPU allocatable according to amplification ratio
modify Node CPU allocatable according to amplification ratio
Node Webhook
Node Webhook
allocate requested CPU resource for Pod
allocate requested CPU resource for Pod
report Node CPU allocatable
report Node CPU allocatable
kubelet
kubelet
allocate actual CPU resource for Pod according to normalization ratio
allocate actual CPU resource for Pod according to normalization ratio
report amplified CPU allocatable
of each NUMA node according to amplification ratio
report amplified CPU allocatable...
report CPU basic info
report CPU basic info
koordlet
koordlet
CRI & OS
CRI & OS
amplify CPU request of CPUSet Pods
according to amplification ratio on scheduling
amplify CPU request of CPUSet Pods...
Scheduler
Scheduler
l/w
l/w
annotate normalization ratio to Node
and add up to CPU amplification ratio
annotate normalization ratio to Node...
l/w
l/w
Node Resource Controller
Node Resource Cont...
  CPU Normalization Model
  <CPU Model,HT,Turbo> -> ratio
  <CPU Model,HT,Turbo> -> ratio
  <CPU Model,HT,Turbo> -> ratio
CPU Normalization Model...
  NodeResourceTopology
  <CPU Model,HT,Turbo>
  <NUMA Node Resources>
NodeResourceTopology...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/static/img/multiquotatree.png b/static/img/multiquotatree.png new file mode 100644 index 0000000000000000000000000000000000000000..0980c8490f7883e8616e1e0deaf5afd6993df897 GIT binary patch literal 480735 zcmXt92T&8=*To7_MVg`zQ4uMM0@6uT1O!1)x|B$lE+8Ux>4-EH5s(s!^b+YJO6VO! z?+|(~p(LbkzWlzKe|C1}&1N^*dH3CN&bcpI@0kW0Ge0u}0|T4plgBR@7|zTwFr4&b zI!V7$|De;0fq_-UQB_q>Q&sh{_vti~aK{6^irg6Hatn!9s>W*Er7}u6CMMkeaEV*Dwyc}W|7f1DI8NHj z*h}-LkClOe=B35{TvupI(0$_9f0Jf8aO~Z0xDFc#`46i@5-&kS8Cl zPQA2Fx!M-)^uv5JS3;`H1o;6fYshuh3&qCcN6d97L<%6@Vas0o5TsF7a<(nw(U_}TVXYx3YC-v z^Pjx|Lef@Gq7aBHP#Hc!*i}8KPid5-_6Pm-fb%?r&YsRi?&ogREfBe-wX;usr9W64 zuq<)g?6;l#b#;aGKCE^KXytA2o~n z>YkrxxYu<$U_R?rr236+zi{kD#*a;i^ZO4$tUO>U3E_SOZrv4!DF(Bex@WsQW`ED@ zA;@&99{cjzW#KFF-uT1>9i!q=0L8WCvtWnb1UE$&vyY(Lbp86zm@6*Kx ztU_lwT|G|CFdnzGE(~(WI;4mAv-U4Fz!pTa6R{uOWDKbNQqk0t07ZM52F~EZxNDx! zDtLeKjJ$29dbk&|CwU0WFv!o}xy#OV9P!ygsQ1P4?J{Pn$PTK1KB^%bdtK@C^U6Du z2`-0+PYjg|$E|oA%Yf1Z^;3Vscl*{D!}`Rz zQcma2c+Gw6rr$L)3)MkuJL2*Kfo9ku+pMXh@L`5ix?e_P7nzwb{A2HR(|3vQcX(~> zc?MFkF--+k5$2tr-#Umm@G<WI{I8KM^ojT(w~V>E>&Kb40cf~{!D$s z<;kp&@b+5W=cp%=2mHiv>q6$@2>xuN^;=&sWz_GX8@O`eH@QN*ZqKNbpa|NRrXef?RR zqr8Amu(D7qT~6Pev}>ZB)SOnF7)S3E;ffMR%SKE$uiz#(c{X)6a1KrigcfJpK)4U%CT9B$zzkg z>cWa&Hk!4zCibS~H7->F?sl$8wlxFA&BdKB(cfJyY%Od8(UDF|&eHAqJ*(eu4}P;s zvuPN_R|+_-IVIZ}TlsVg*7#KMO}JO@UD3R_aqoqErPHzwBM)P~YPM=ztEz@T=bKLJ z&a<7#xf1Gm@h|TcyN<|-7#Elq>^$?)aE@meuu%T(d*tk(+q-9eJX2&VKI7UfG-l^esXLaT{vUnBRB>BZkok zS`AW02?U{2YWrMP1y`Q}B0vSrigqlaJ0>yK%^-1AK1(L+x}Nl1p*O1YVOOiN`m(-f zfv?8rFExv(G6`7;i9d3yjGHijcfSEPqVm@0wR4J=iTOn2PmP6l3yauQCYft8)`o3= zpY%SlvQ_2&V69R1@i)X(Kt-7Jg!SG{#r$ilj@#=KsTHms)xR`78kXkT$(@ohsI*s! zS2bU28a%y%Fv%c5MXx&Ay)Zr~TsH0I(`S8gS~RZc$4J|VK+&nClbvOk##QFL1G)*9 zxk^Q})7}K-G`Gz}cit@0S=3fcQGfDOYdQlT!PQ>0$oivdSbUgVFR}Y_mn9S8ejk~< z*AMRozAa=_NaczQD|s2(yMJ>x@-;#P3r}2qpCs_*uL3#nnZdt*>CH1gW_~N@{i*(Q zGdusT#)RVJ`-AH4jBEbSqYIrsu6@0)PinVX6d*nU!zmpnZxgO#?o@oHT=O+q*Ee}i zuyR@)2^*HlKUYwrTbI`Oo>Y~Sm4mULm;NRF(*FCubh*O?iPh4ws?U=fRc5OxXw$m| z{%+VAi^E%oo~u;LZG%6RgdNHcVn0hPZ=VI0kUS4}(lU73CM?J2cYEd!6a0qBNX7fU zMO+Lj=|D^Jt3pyza-eC8h3CTfAgY|~{l>9lDBjMkSN68Z9lkrLiWZ;t@+5xo7$c}9 zPepOi{Xp-H%{u3sWeV`!7mdER?aFLj$A4qJ74np&(Hg?i}a1BhU42b!IZFs$e?BRpg&9LdkFLFktvhM?_?d z8{gKn#WV+PmqcD*uZt%=-A(3~^f!0gy)x}f`M8&)Z+xme^KVDvyM4c9+`r4`TQ)Mqm$Zx+MxvU!p6NgXzQ9eq>z;~$K)LCR3 z@+@4^Hv7#L*%xdfSp3J3b(eK+}rlD*-sC|Q7o{LcNZ z`y0JBPA^;3$%`(p)z2~@eNH~mIT3vJ*x{?k0Tt1Ao|tJE*#xv4sO0p`Fce16_U;8@ z_f~H*kZW6a_(qH`b3BmY34YHJ9DhlACKB&d8JIWn;SJ=c@T#+)r6NJ@CdLc=bc}Ym zYUq6ehM=9{ycl3iePJ+5#}o2)CYts-It*g;F%!dyC`X3V^wA0W$xlD&up{f_|E`>w z%R2SH@kzh`zU*x+e#F4=fI;)|LxcAxw*TsWk?6%|co%Q72a7(rof5YGQP%#|Ylhg9 zsaBDFZFkp0PpHE;t-ptUf6ZPEAGHc}c0H#1MymImKV`SUr&EFF(!xSx0P)~&U4w?Y zeIxYo! zx(>3Z$W-acyq$3XYYbW+n&73N(TKg!7i{wZzYE#tzE0Q7E~;ws#leULp$G(g0m-@w zCoCY_q9rZe_(mS&PA%%zI;EC3&)pg^X}5S>KLY)j`0oxWOT;08pn*t8Na%*J8xyqn z>LD;%&g#ceaY}?VjxUH%pr#L5i3T72SVXKtPI3^$!)gr?j1*%knv$EKsQG%LI}S_) z40k~4PDbYMVW37g_UgNJ_MBKn#%aP(^CUD6jZQco1s(U+;UCl1hrzh1H;ok#3sW3K z(7TNT7%1fk=D@krbZF2~xP`9+jAI)i)fzOyxr5q4MI&}l@!I_AeiXZlJNrKIdoRK~ zYCAFaVCHKg6CW9myusYBj<7FfYiRy=#({771N|sF`*of0)e`pWkG}{4w>x4HUepwx z<`IpTWm`y0D=GQ@ZHula8*yqC3B#yOTco}B&Q8W#VcWUxii4kGqo47uS3_~_7FRf$ z%rRKe&uTEdDya#eoj;u%4y$~BZd2QbKB-E|3WR$^{j!Q1^~QSQP`(ENJhUcE*Hwn= zZKd6e}G7N`V(tzl=7p|WHKbhm?8jHxG@pzK>sOw^6J6FId@##wlaS_m;iX@h7 zCalYEuU&wE9plw!b2xBvsg=4sj~Rr!2V4*ul)r&F?E`D>!>A59VSdgsh4c5OQS2QN zG2p0hMHK4DDtG6Pd|Y@Hb(o9xvG{W4KM4bnWt zE+<@ggA=LKpV9eUdgM_Ub4RHMzNqA*D0DjfMAMIE#g(T%5w%Br`}!Hl3cA=F&>fk95*Rz{Y{*ZbI0gwRQevztw{261vD$KCyE5Y8g~9TA3i|0#x9qk`GlCEtcL7IQ?;vG2njS+!NbZ_0)DmQZJIsO>{kEjo@JhjM3RX&d{- z?9-qU+jV?cZpN5~Gu=cM)RHx{)WtML@gDD>xh;$(ZLSQc;9|r2ZQkI*P3Y)M)mtxl z*jl>`!%*GxF25O@Ta*4Z=Pg_Bsy+J`y%E1vN8ne||5wYx;+}nP-@>Du32zvX>~{`< z-{fT-)|P9fB;9ZujOuNy-jv?=a@Oj}>da4whiq_BL`dOb(r18Ck+1n+7_d>b2tVLc z5SrOSiWhpc7=OY3lEI}?(|oOXlo!OUM%RM(@=si*U{NCwD^W@cs_NF|+ixxZzIfb# z#q!kAnqY)=8}gqGOh5Qg4tt3<54V|PrA2cHKhgO#Q}??SGf6uE)S$g+zaO4=rK!E? z4Plc6Yo;8?$j6m{3%-7n%Nf{)@1TYpAdo4blgvxr_lSjuw>(x2;KE5Li^kPFo*3I>Bpd8IA|9kR3kRVQ z9C*`s_d#rs`z5#~0=6Y9C@!BQ^7f5;i%OPU)7xdAw)blf%?31=luV!KJ%sfyiZ4}K z)qcHk`C(Mb$=NP_9)s3H;qwCy3337OCkg^RiUbjO{}tCorHFTtq*T-$qUq&*K7O{l z-?!emiUYZyL)xnU-H(hKUvs;!chbgZQBumfq1BiLv2Sd5XDu@0C;@6^K$*4A3qPS< zJ&X0Rlv0#4RH!2!<~TH?=Ww5&}aBf{UdIGhh5oJbuNt@3_ZN7CUas61D z8B<5IQdB9NLEbx|l5^aqlr5fs;P9Ill9Wc-LD^4uOc?n3@mkgkc7+url~46|HQEr- z202$d2~+GikxXojqbACct~V&2*nVz~JklobRip73JltMrUs6Ch4OUH4%sg-XLGOz* zjD{L1Sl-!}`#hN&>>xLVqfAB`7~PL6S>&~=eeWS=|jJy^o1 zbnXeM!whMiO1T>{emmlO>~5LFLsmY6n;4}v@7OcYJ~xZ=Q;V#{&Lm32C+Wda(e9@e z9`et5#JXi5{+?$|-(tNXkIUb5BVy}spL8gz?^2llic_v*aNLVTSIF6=$i;RYc2L_} zmp{d_06Z#7uod{tn)$sWIj{%u(x6Pu#s>{A`#Zuo_4=%yo50^t`0X0yM|VzyJW@Te z;*Q_@s2gYPm&=h;uh9LCdV*#ItXUEZemTCpy0Nv6}{5ReeZk(1OpWYj@5PhxSNCv@BE+hQVpDVM>=Q zDRkFuY)SG&WQ9Wp-0HTC^5*{Wd6+*XqAK-qlvLtBSsH8toeocWFNjHk zYe`Y7B|Tm2Cg~IH_bkvl=!?$fXcZv-nhj$A?>H(iiYnfD%cgF~5sfg7j>X|@0(V!@ z5*xu?!-A}_iv23$z)pRjO#DlpE;Ar#=*$-cNwhppRWbaJBL1?uqD|fw>xRs3@uuq? zH^}A`A#{ib;S7(^eLnkjf&C*U>oP%We61*h^_Mikk>_x&SApB#F#C<#iMX#t*0_(z zr@x>5bLm0Ey1PSg%e>~@s;)gxY`pnjm%lIJEB2f&CnSFUlH_x+W}bA16ovA1C#VgI zo*&?_aV4!?OBJpqZslMzcDbi2H**yoy%iY;9{f_jXF&5Tgoq5~rwdmLHVs5GDjd6i z$>ngJPv~=78h{t^iX&UhaK_hs5=vo)8{ct*726_|FbLIA`l#tv;I)Fvcree^kNwHO zJ(VeopUIR0kPbeiNxFfzfFXxPalqmReB=79$pkbtMqj4KC)oQ0haixroq!T-j=pVoZ6b!%{vWI&RS-}~5sIiAMSUGO zbcp^rl_0{P0_+m2)CD>(?D&J}6WK(|1QZH^Lyie;o0T)wNVeB_n+7_4YjAo12KHq*Qry`nvuInOZ>>o|X7e5(w5lW?z;M^I;tC$l-4^4IzS=NhX15IoguommK z1jrOF7@-F6sddNWV$!&!E!76@rj^-0QpoW$Wj!|6p@5_d501PZH`KG_&swa zo2_rja?oJ~wTZa-1QCt&p;N8l@3YX?q^TvYJMfYEsZY=UhzGfoj3Y!(rfsUPi0DYR z26-;LfY?1=P@y@pdXM6l+~lOz?O^F!dp*!xj`4mvX5i-v9|=l8I$6VVxWPyIla7d^ zaBm3R@AVy@$ApvPb;6F>P$S=Ju%qY8fCcj?~@_aBg@gQ{LjT?^A zu4_Bsey*xJl{gSB^(iVL^CAnmUv@uHHM$thmw5eYPfsZy=jRo1d4n%({AoA+u^BPL zr@vsw>%pyG1dgE=5!Crx`xhF;l@=QcISm+-*d7bK*Uzz}10T|F=!0^KLPYC$K`4Vy zzY&EG7jox}&hbf1|2bcXB8*gWr=&PsgQNsZCW!J2lUTtIU&LOs6^&E3I-Zd=ac?H3 z6JFfe(Z7E&3+vtIZqXVvv1Sd+XGoR~%KPpl6sgSjx@Q~SQ^^*{3u@_18e@&g@cmXA zKZ<&674VvtL>1Was-QU{CibvMJiHA{xQ}zB%`T#osHLR49r8JNg3^u#N|!Sa!w1=e;reBf~2cx%LEqzMSuhAlJO_#YS

1uR!Ckq8yfhKo&?*agT}xZYX%5p$+Y%-aHMDI0JBBOfxu4loBhBgtiz9%iKX+ z$x;`v&czIL5`0J{c8Ms04h4&uQY2_VW*;bFY4<6Hh|!>&gwY%`dL7MU{B^xR^%v}Y zf$p8QAU9kc_3Ehyxlt6qb&Mfm9f(3*CEi_o>O^oSY>w4<^6Rj`J2`08ob{1VJDMCp z=z*Dq$|GNvOG_V^@V?`y*Y2y6VMi8Z7h2xsV)7Dv^ZhaE1!5gZ>G?{iom%_3JaLk$ zTrUK##;(J@fs?dQYV!|WteawDAC+2Zd+k{tuC5~7Qgh*u9KrSEl z2?Cd7{HfE24#43_>1({b{M|KTV-;5(`!N4{D*dO!(m>>Q*GQ5-%N2Ia5a+PzGhF8l zR_xS)VP!Xia#d~gp2;u6Goimu0}Kqsp>UY5qBg^KCyHXD(_Nak8(#ncvyk?2uW4po zUx#6Vvk{j{;nZpkMmZfD2A&P-`u2FB7W4-%2N4ptnt*j+ktR-<$yS^fXR~lKzCkwl%2!O|av=e2!CLu?Lwb$M3UAftxKkv}Vv(X_ZWHAgAMj7+?^!YO%O? zgxZDIVS{NZaH}QFy!274IlYkM-oe3$STm8t5dvk4Ja~4J{#8jo0=+ z@3-T(TS3Zm` zA)~69Nq?3TDq6F>`Dp%hA}BdTv32L-AAd@_q%wy>A4k$EvEL)e8vW7#9&L|!Xa&l? zEg>qj^4j~ngb2U1Gf;iT@rLJ@{5UU_2kQ?i{};lzn;st_sm71yp{O<7-gI(MZZbr# zIPKg~j+{dMSnBVrLkW3}gAZ4q*;THcUy!sfxy(|UaOCg=b9La$H?)|2yh#^$U~I2m zl<@a_;!j}~xIDX2kEra-q1B;cYDLIpnRZ}7^V9b?qMxm^nOc|2=eqeCJU?sojn+k6 zU}pi+mWroVdYuK%hjm)TnG|`?vm7?T4x0@T5o_##ChaWP0|=SD8Wf?X-Pn3gLeTv| z5@)*4LL5PfgJV0(`Ep+ONnZDFA#uoq;&TyihF}ESU`5EMUaM`cVSc>+5J4Cx>G>m4 zi$Q359&NWY%)i13a6WKN8q*IXD(Nj{aHQ4z_1elb<~(9( zQRVQ!n8Uq}%9#m=K9zS*Mg4mxlzb+VKS^nIu24QvQaoHh3YK>zDC8*4_8+)&S z*qR63L47Ii#AL!m2(MB9koMNfy+IM~6&aM0JKJjT;lce6(U`8F;gOQ|+z=r2qmPj<9kv z2bqoyp?*l8!}^4TlV-%leY6AQRO@B^FYdhj5`Ln3ZwQ+S-Phz0SpS9%MJ{C+&MuW~ zry3SVWF~OCNbVQ4>l_^_d^%wCh>~k)3FY0}Ku^3SQH9_!@FT0sBw0S(!l1a~mZI~G`kJ)e^tKs5)q}LhA(Pawp`7@Lx zM>RX$(L18S*!7y)N~?gn#lEvrS?CFp_$Fys)xX0G;P*yE@KuWlB7!VODbRADM!n1V z7|7eGbb2EaItZhD)(M?J>DkN35;?%UKcU%XBloxf?n?oM(K6MJv>M_ROFn&?8wmDf zg=rL9Ern&{YZ+DUr7uO1Y)r3K@>tjQ33=z$&JVM#2m%_E{sxd!8|{ zw8xDq&WCKSl)52x*$` zpD)U+-o2EE{Fddqi4f`5q4M!@I$2{x`pNWi5%mvz51JulIwVHq1BGIi^!fjpm8PpR zSyqW!Z!GW}{2qAnP5DaCu0dD(rWUd6UnmW~i@qb|;sX%&0M_TYrAQ zWQ-|;KNo3Pz*ERBi(XeXZ{WO+B2ty+zEM?$4%z!|j^TlJ{2E^?rB$QdkM?q4rzf;% z(Xbtd#XUmIgfkvSgjGQ9*HGn*76{U~y1@l$%sK{Mi539s0b{cHwgcY%&(b!$9e#5R zjA7~|z^Kjzv=1EP`@Vw^jGZ$rj=h19{(SWXqQ~N4MpXt_hu_<`BaDAU5M*~yRO2t1 zot;JK5!!2dp50xdjjWA#bY4REQ*cb9pnp~4t*C0K@lvl)V+~Dk(QFmw6@b`BQOEN= z(-GR_)WcRfe(UxPNGDNj`Vqik(qISx};S9H?FJG80Uxo6(VJJ!dy24id zLpy_VIYHwSF`E3yL*BSb;>kl>Dlv{-hPBaiOO!vu6ZvUVhRn`D2pTcnST<_A&In(6 z$h)w0$~63;Y!5eJn14ATp9N5=)4D@ZYxyw3lAm%a{Qx^|H!DmddM&buBxZI)g5{ob zvIX8f>ciY$^$Ii)2rT~A43AKIwV9}KT)LQ#&V(tyAAf$jbIn>^E%R5r0fxoqR0Q*i~sAGOII{VcS?>V3@8u)J#`usuxiArz}FHax5G z9Cc{cbx#GzNSR{7Qn?#V=k3+keT3+;|KgqET`}7#E3R|*|0|SY?seVxPl%8kPRN8g zue#X0>Y#&%Ojs}L5qjDjVZ9GK(w@)I;urw5_%1t5VV zd2*cx!SZ4OS%tu3+M@pB>?quT8*R43=bY3MS}16)ou<%uWpOPX7W}%14T^OLfVKlU z2=(Bo!~gE4-#-~#>mAfiOreu}nQKvL(C?-hy6Cm2ySirn-F@xC z#3e(2aApf0%C-L_Qnm0Sb>2s@>hnz5p1D@$25xMH8Up)BbNX)UFlf1h%Mce`xvle< zIm5{`CbE=GA%iZziJ*VIVQD?)-E=y=EcAWZwq^N)hLukwSzTj9_c+#%olttQ{<~nQ zQdxgz3`!eSkxLPWSpU#e~62Oj{SX44xCER1f ze!87(uH8_?VXV;W{P)x1P8OhejUc)N&MusK7;izlL->7nWThx>8l>`q%|X`Q#j9Dm z{7Il;=H5nbiz*<_VDcE>JP6$**tp+$9d$8u%W!&qw;Na_OMNeXeFT?m2bbV(1mSB)2W8{t!ctIj)A7<7oGrZ;|bm(M;ueT>QGCH_zWJCPDPt3o{PjR{y;w zCP}iHO=*xd6j{Mt;3R$2q0 z9d^Iy*+b&{15`&;?m^MYwrKK~u_*Y6N0=?bm>~0hkt6V@UDEG))S`axGeiq!1J#Nl zAPHfpB1uXdYJXhxRT}8V8<#9O#^4QVF&HZ z=E-HR3TX4XYo73%+L2%~{*V>%{TUwB2^kH~I)HD{9?=8|@ClLrxbkWphs!|9OPUp8 zEUJx*KJ(6b85X+@pqhfu?<04b8f!1{DL4vV#@T-0^@ygm_DlhJtR(Ze1SPVsN5JWF z>ah&Xkw&j{i4NB|2cKA09wM`K(TL{`k3Zjr0=kGySaM!mHixE4z1<7{mnid2pr?;G zJ%CzWQ~A&F#x&4lsG6@sDnCuK@s*Em->65H*uI9?eyeh%6C>;VVg=h8(_~0Jm`PF$asHe?g+Ka zafT--oG!N!yv^d~)aAg9oEQMxc1(!7l0e0!<6jdy>~FmuS!$PIo2mB{A*8IK9416Bd8C$DqPwu6qp6g))n`lMlrzC9tIxa6 z6jfu-RYRqPHt#_>)sjER2+qm{*x7r%vpBi=9W_!59g`EN7i(V*FezE7y@HFc8@fg@bCn#cP};u z!G?B+79-aDP7Ij<>(GeRm{tIdtrJjbP&z%a#$i;j?I0Hzo42_0fz27vZ-35XGBxneQccuN38})9hvdO0rYGl+ea%c5rgxpem_bAEbL9oE=M-HXWzKQi>mwaNI0Xa z;kmK&Z2 z7kzz*bFBfjZth$S1;XR*wx_#X%|R3HsJ^HdDt;IJsb_irzMys1r5j|C+oual^%2;p zuguY#1B{u~EPCTl@@}#>CcqbwtK9LHya9Rn0%Jy9#is+SLtsaPsuxS3^?wc+VUR%s z&ZQ_bg814<=f+e~+cSVGTg&*_o>=P$?HS}vDL^1|gB`~;gD6HS2jR|%NlMJkO{G)C z%r3gg8*E$d0jdY_BzssDaHjAmG4MAbheQ5T9uS6U2g9DXF3TooO;91y!= zOFObT@9H?{fJdHZE%{A&Z$+2obc+T-C0AYE@b((oH((Ms1qeXJpj?`Mlt3xq>Xc&U z;n32a6O8$u<@+Zofzz5GkGr- zb}!aO%eK<8^vI+y8LAa;A2fQ#?f!;{Sjx|e(NV0%Z>RC5@vRVmx#*Qz{#E5w_-HAi z{4dk3S^l{A^UF;L5|Z>LEWamVs^7jHwv{WfJcfw?pXVHjQgnboFdFlxu?a9lgrFwS zL#PPYep$3D z9cQ}cZoao~%wLfmVQCbY4)x&`PqlYDT~1s`fhngVW)_br3MH4t|HLQco*IB1MtKK$ zj{$KU_fcoAQF~uaFCoXzngy)y^N|a2-sHX2+w%P%+|w9W_M^9O{`HfPHh^9S=Dgo; zMoPprxj*$_Es~MsOKNI3dUt~*&n5Pzsdl_=I|xBEM(6(d{jbt2reBHjA;sw*VWLP3 z=RcFl<>6(R(D=)GogX-F?8ZLX_B7#gp#%qheMW~X+eM4eg_N4b9=pqmx)UxNFhLn- z3D*Cb&%x_#x;U?~fT;i-7i=Bg+i#!hR1A(bJ&ZJMB0)a<##ocpz7xG?=RZPg2X#7<6Tww? zfref(PCz|=uV%(^<3B*r&_M@GfDbtQpS!34U^(@O4s#OKybtrZJ-(jMSZF+_{7-hOS}n(Xu?(vazV}+QnAFS(*_-98{%ijlT`j%Z?V7 z`Hl*{f^YTT*~CCO2)cp!;K1%mVH~8lQSE0v(7ROB`}{_sK6F2)JwQB^F?r z4|VjC?+lVxr_r5K?N3nC2sPKDMt+z?lpEZD)H1SUA}u(}NDr{Ar| zqat_==+8;-_;5w+pc1H0w-)el>q*(gCwFV<{avaFDE?AqbJzbs@Vz)Pv71VW|1`ct zPDjyma8O-82YO%O(m#^;>`5C$cW!tltd3^c$1x53ZEUoF(^UnfLx;e2CXf6%=NH>W zSCM(yEqp$ZBMTl}?5RWXASNGw=20v07YD&64=kOk)sh>dj>{1x48P}z-O#QH&I;J zFg_8x=F;Sr8U@weM-u&0Oz7E{Fo*cw3XhVxTR$Bij6 zZ#Lm4O=x9IMbCx)F2#oPX^ilbfsn3sBJsokPU={ zLf#40(Zl_fQo1fm-g}pDy>?kNit3#*F0U|6Zzu4YtPq~4q1}9^a7!f>kxZpWdXGQj zD9`*$FRWoah%z*6hcv#2tzyMhmcH|=7#Cr>d%Bl8ngHsZETD|Lojs?+>n*`yrRzNT zm5@~MOgZhyi@jwf2e|8~UHBz!QG=o!#PE$Tp_xPB!RglfZOg*Sasi$FrLt|7?wY$Edk$d@ikmAqDXKr>%R4FDf;7zGyH zDUwG%BDn?OU?g3}3Uj|q*I%13gii1_Rmkc3lCwvxvJ*WzH^T`9tU(59Nj6Kmq$9~n zXbWl?hJ6aD!=A;kS|dupM4BqS>f5+d5~Yu6uj{!Gn3x7O#<_?79su@{gQyZPZO6kq z0YR5*L=>g~_eHO-jx^!YJNs5O2uDO3l{SjML~DA>N5)`L8}N$+9U3#O1w=(-z>+85 z_Dee9lcgLGBeUf)boAn5Ngs-X#Uts)N&cC0b(EqE!?%W>(HyH2@g#W_ieog66R4~0DJ95Q zl;STnT`>?{(kA8mS1SIMx@xnrhkQ4=uyuO=3?}2#JJH#ow3q3Q#Xl~*v^LTSqm4im z-QpC?x7>R)D2uxLEOf0+jg?1?{PwuvP3EPT90zc0J`47(UF=8gQ!vG3K)v>l+aopM z0(Ic!4Fy+(16@_vL><7L^<>2XD_RLvo0K4=ZNu`%QTG~m{qy3Y&mAa1=aeEJGcuYTZ|1Tc1@v<`G-ay;OfR$G; z#$y+nO2S?Pw&qL~T~UME!nO$a!>D2VPwl%*Mwj+dP@3$z*KV9^=_{ii{{g?qr%6DY zKS|M!&ri|YcM{e(RDd}O%aZBeZ5dQCt5&iZy`OVbW5~Qr*MZV0ATV9FfFo_G(d;z8 ze^&du(*9$QXR^Be5rX)H@pG19%}=6jcUy?qIQrlEjS7SIM0sAvWqs7pMf%Q-C25)Z zyQhO~m6j4UljGcFvbu`>)q_;92)zViYwzusL?pohl=sa{%JOr4IS9d73A{GXdPc_= z8A{3qyC2|-|8CwUF>QK-pGIpt$0yk_5;pkQTU) zAm{^3DN*lAsOdxm&WAa$<6SEXo4B$9TI>v~Nn(BQsBwsOEkcQ$5ZFSOPeK22R5Xl{ zjKJ%2&a2Zp_k~DEYQxM*?$)pBl>}h)iog8T;8YZCVlUoAaqEJeKI5-)k zexo9ndat1TC>cw=#b_I90ToV4zTm-&Oc*0BMHs)@7 zNo5eGe-8A4$kRy!5Fb%OLI0(DnyU5MZ$jbnp@tCwe6iO~!B%oltpt+aB*ja<@`%zc zEfi5Kek|zO;tw}d6zLP+-RXyC(W!Zz0{3}=hSPWcf0k>$Bgs| z$98XIGXafqQ7y|$9N!{TKUS6uL?R>}aseM>DGmYr>d*h~03*PGv1oDb{TS$~Ze~k? z!aUPIy#-w%zY{$@h57;)+mSu#0nKaKZ|XkBGCTvzqxeG+o+o}3%A({vqulLtm>c^Z zaBH`HjkphQh3%YoKXwng+AHiLrd1cH=)O-cuZ9O|A+K+fvchp5*WXHjY1G%u^4r6+ z8_$e0i?>Elqb_S~z)l$Y6#Ta`)|J+!{ZzA#=cX0bSjwOj=KGOm`X8Ax0g98OGqgCK zhR)W}v_gi?V3Kdp8_HqK&!ii11m~z`*EinE zQ4Nn?(~?P7cHr&id)GlZs?xp^D4_CH{amV{ z;R*gJuv-_(mU8=T+t!3Zd2M3GnU2k6et(T1#1SFQ1p2kx`6!x^PJKV}A-#h6EE%H_ zJnjDAcKy)5pvIcUr`?$_G;z%;u;MbrfdbizY5VzK7TV2zscl-g=|OIbhdu z|801XeYA$IjHzCz;0rzb+r|J{Pb)5JIr$SzlQv((C?zh6x>l;n!v=k(gjE=m4MK%a zGP%$0hsS%o)p?G^*0tJK+WGc0uQVYhraM&EeEJZ+FZzj}k#=sKWmG_s9oRsoIfAB} zi@G2J21S93Msv*< zRN=p#Lz-6E$>ZA^dH+bKR^kI*A$qE&+41%1nK;xE$NW!P@Y7pEE{Nfvukt(ko%@mw zqU7wZdAeTpUt_^ebkJ(4vO|nJopGmQgti>vEDy!S!)i!-HJzAZlkf7bg+$}E0xZ{H zDxh_kw(t!BucW1SKhY~kr9cAxi3*5>6AUHR;P;yT1H(Z#=*HnV^l}{9qVI<&8AtJ$ zfZ!I!r~k4+>NOL?Zu8d8-|%LpkG&UK-do^p*j0L{iirmu6DWuaa~* zLieBftvew@p?k)XcBbAN+Wuo(F3o(L?@60amWqjq(vxwzzi0X=-dQrM2DbZsJrs?Q z5bsV5CbP0N!CcH8bO^{Z$3h6^0^Xi7xV3P7xx`j1#_i6=BJw~^Dvl|LbnH)&qi%#A zX)@T!MXs_2L@uqZavS zQzpr+FeUc!hZX^n{!o0;O!1;Svy_FTl(Y7#Zr08q?Th4@Jjk@iFBgg?XNc2!gigX~ z!{w0>Czb_$JT$dE79|%nGPMI%Y#+#baJ)XL_mz0l*2uezldVLm!CyqRHE_4 z_YR1%c-SRwvX;Nkh9*AAdlANUL>dxAM=oAY^Z5)s>CDgQyW>E8|Ie(k6ON{I-#h}J z-mG6yq_r&|jaDbv3ANqSr+?P%9Js96F&{P~0pwufh}Fjt7$sN$(pqq8KMkEyQlK#= zv2~X^d>IoOKtgg?=^@aLM96E_#-F9w5HGK(D&E@!+EAn5@xlf2#{8Jm^q)}11+C$Y zc~}u4{RaIaJfJ>K_nteP_w#9v43|yIl=a+DRBCo>EBo?GKE9D}kB6v@;TDOZT<;z? z9eud#uGDn9Er4?`jcg|Hh6L@f$QNRjP#^T#F zc1vMZf{B5$&v#C+>^T~#pb23MYih;Zeh;~z;jstKi%kFA3qRvY#y%g7cp`>0M2`2s_S@5K+w{Maz4-xc9l!TL zN8;X5Ce!V`#u@93`%Y3vX2tOxA{~Cm6Pjf!Bf_okW@MHv5zxzLk@OGHnTBG;u)kXy zmwM}t9qler|&5iPs`DY9DhnJlC4Od*`2xc6Qg_oeb>uToqzDe-w@g zN1dvsVd+_nh0-pQoso}~&GSc$vYhaV*1jCC#Xabu`{-SrH)pRN=AcSphl|));JM3} z83s_DR~r>zz^fVfL|8cUu2TO{C~+c=5hDCSh&4@|Gzk~U?Xm3mhALr)efV;FN1Yn? z$a8Ok_$5X<6HCRe4E{>teCr@mfL6lwYuuMtv#d;i^uG&qM@Peh{1%Dc;B}>)Fjhc+ z47C;8L(%`1nIJI%u`^}Ua4sm0XnAvbBLaF_@XyhQ^*INO)Cn!1UHK9O3vt}nChs?o zQ#`)r>~a2}kj(HW5jxy*Zyf*n=o6s=1*RM}OsU=!+OU22mDT-d_Dwj=br`x51k0(Y zXbw=qQ1;au(zbdrQWCm%(MWaii(B`D(0OKJVa<0-YzYgr+@H5E86iJ@@@5~QA)HI5 zK_X=)c(Hgdx+m0Sqwy74b5FTQ$*3T<;9k0eTNJg)!ZWA6_3~1x%5LSF$bd=0p<>;` zV!Z#kY>NDPZ^25!4qvXW6Xb;=3CXun%_&E8%l3yNr+G%4a7z`k4Z|N7pUq*qLVzJM zmpSV?l{Z#|Ey`K>CQeUfYchEGH{H=q3It@^rpV4Uu8>eoP8ep(iCw>rh=GH4JG=co z2q9Of1o)4dtc^)f6|QkZ^zwCdqfllj{vGYi-pm-S9Vvl47O6C>eAF4@{xy!sd&zO; zIiM<Zpn@1U3adm4St@uF&43#*B1 z1T@(nv`DKNqOx2>zy0qN9lNeADX{(#UQY%d$UMs+Y_o(Nw*{XZOuQ%l57N zLEW=<31amE2cB(>b2K;Y9(}w~IK;8$FDcT`wd&|-twD!E>vtMI9?rO#9CGyb=Y#K} zmo#sNn@p$unhzRXIfE83O`aXVGOtOa-W`Y`XPV1$YK z>wM4}lgsrWk3rz~Ya@3NpED#EjR{G>qDQxGaux!wLMMPl4`z|Llu+k@%S<8>FeP1+ z+LtB94kaah<*l9-#!vJ0J58rwU4%{m####wr#ZF-PR!{3Jbc+!&Xs2L&w>WYYl20? zJnXwS(#xI@myU6YNq^Jh-OzO+x&`d>dbEaneS`Z)Pa7anYO0{nH-1hRS|vZ(^f)d}=FRY^){X9tauGl*Ymf(9!9(=5F0!E-RK)SiLJfE4zRI5S? zP)?<7k&DQ%pHE&0(X532i(bU85>*;xvgJ?B8WTPvJCH?RxZK0lYsJpV>c3nDQ^A!( zf@prSs*FL!2U-QY=2B$Y-+U3Pr9A`XOs9pet9LgNB!F53jE-Np-P%*`pMStK_bN~6E z?nr|Y`!37BukW-iG*lM$d1oWEQi<<|g|Wb+V3u9sHT!UqwC$sPLMErG5F9y_#Wo88 zD?B;SURc>aCnMRcNmnutgR}n-;?X7C@8^PkstDA4nKTh|5`_*6#j|d z^SMa|0e7#7!a&$AB)0Qmr*zPYG-Jr}RM}Q4eZeC^Vdu)^ZO$jcFo&zKC(kJAoZT2H zIcS*JCEvUbqi)C@wdV>4FTY)W_iHA@8cawrj1by6_GP^i%3l9S+QN!<_TKslsG*Zu z{=GkjlMiH46;*j$S!=ZIlSkfcf^f{4`A2WP;NIB_Q?$?&vt5G5+nd@3&a7PzV~9i>`7>f*Bf3 z>lrlf5+e&$oYN6ts$k^^*;lTyb{dHvi{s>-|5=67zIBs|26A+ck%|~Rgs@BRw0^b& z{nA1>@%!^$;&TmSaTO|N9|1}59n?L2m1j*`#3O}prL=>K;C zY=!>UvQ8bhBfzlbShfgji}`=qc0L^;E*=~@RixU-GblHDqb^YWB7zA%RRcqZqq zube>0mtxyh=aWzL1K<5O$qoSWN-Ex`RL^X`-Z1a_I$LXNVSZ|V8_MQ%-P3Q#*@9#3 zi=Ulf(zJFTO1!dAUK)St?!HgNt6Q}wC)NT#^pw1RW&&kjmCO#nc?s9bfX1C5W#t)QZF#L@NQzdML#Ifo^4bMU7IAY zV|O0@%i@+_V0m}A%*LHfN5z2!rdzQ4U!Ton8s&8)G?xE7x&Z5ud{%F=NNUu~O>3qh zA(TTUFIUj8)NNKcN18JV7V|C3vej4%8@G1U7oItxt)Y8kfQnJ<`pZ3^AXiS@W^E&q zx$n!? zd2RAR67*DBZ3p&8c^SK#1}s&?#w9ELJ51LwaZb2I4$n@;226%I5P0wtz$im|XYHXx z+7)=lI(#YmN@=;AC*ngjL+}~L@m1N=LE=M}evvx|2!>)e+gb@UW$-8SJGeueJqGp5 z+-rje82~pj`w%}5;$qROdUqM58qP4Ad*Amzau2-d`9go&Z{7`Z?L>c_nMa-7PVu<8 z^UxqbUb6jO#C_@;^r?N7E`z%71a5{uUt1n(?{7u4UY#AD)$<9m3HP1p@oXWmUTowe zb&8MQF^o`up*=rSeRJ$Z#<|O!J{ex$XpqfZ1jD*P1_3z6QA=WIK;2biK>}D`K3O)Y$f*qzn;6f70?3FeLPE>+QHfRhftUyI!3Y;iyoY zU|-I|sFkDK1C!o8*K>>xD=MSpo+I1KZ8TZ|U1kTRJ{Z>R=m<_5ILvWXhVj*n4aKat zQ(9a79P5o1II&{QZKr-^XN94x{GNwaJ4zHGVnk(9{N1_aSo3xx zEz8~O3Ldha>D@28>AxRbvb(PucaG~<@oe@-9d4S$S7#tBn914(ZD1LqVFL*g zHWhPtafWpiG&P*#IZh{Uq^#tTu%RAJ@|y@$$ge5#vNE`~c>u~HsQ`Zmsr#-RCNJ+z zR;>^Qb6{D1oD#>^!a>4fryg{KH5Ru_*do%1l^7a&S$W)O#|Ywt;0+?bp$4u(696rB zjARA>MGM#fQ|_t?$POKeuIQKmB>8F27AnRe5v0V<=$#6-5WhRLc@Z{W40jYL6g*Cp zQx)g2N<70kv}0F#KWLcZF$X!}l~9}z6k_$+0I)Hrmf!W}p+Rue8G+F9pt z6_b!ZSZkpUJu{Y1ATXH1#E*ySr%hgPHXMUFdy@C;4Hp%>;r3%d z`C%!4Lx~lmw`=V%Ic5>ftOwrJP3RA0l|s}c-3MdZf0j;=u`^SDE-iNghEL@l-JVEn zzg|F>o=-%D6;2n%8JwA!MB=3HJX~ryHu<%$Gz$YF{N&RSyWqow$=BPm^-sm9Dyeq2 z`QuF>+3iX~B>JNf>A6AeGFENsIx^{50&)>Eir_fGzMV2O8J#=<>kemcR@`fhLscpn zT^!E=GFKBO&iqKmvd-t8SkGWgqD8d<^TRV{ju*1BP zL4jjdQ7FM-$?0-u+8KsZ#ZW`l03< zqlrsmzv6c}J{|>Sufk9N#Q*?{l;^+hq9{;ZwAa9PdRfF#N83aA+T1ZH812!0WgZeh z{*l`MVxBd8;&YtVt;4{|%OUFgg93D(IWA*l^Z2hmU^<0!lSeD*9?j0P9m(zv9S>yj zFolBX1scNzOGiM>9ra~s7ZizFxb8SM|BX{8=<9^m#P_mzA#-SK{ux{`^N#8J&t-TH ztdO^`0oZGk)c~P2e00TWVtxocTn$OVGyN~&zemFh#MGazO_Lr%<%|E5r!TC>vXRp* z>J70+a*%&1^BbJs9cp)cDc$Zk%@}OxtFP5pO4&je^4fo}3OcZEb12Rly*3m3djkHF zd^)Y+-QYvqe7F{Xic?T&uqHU-QkSDc0bL`pfL3S=TSPm_K?@@d`?koxS^Q=`8wuoS z2fU+`+4)g7YBr!RzRaA%i~<3BQ}lKFJN1apV44E{JDi*ii`?7NU+P z_m^_t8}7j%Y3}NFIU2Qx093OQW7+fo%V^l|*ItgPM#0sP_0z&!pw3}wFyiTcvYP=? zF*_)UzD+7T*~8s19TS}8l7H#6Fb=WrWv(T#6oS6AZ~wC`4Z&Y8*GQmDryIS?`-HkQ zv+%uI@xI-PMY`#!f0-66X9@3u8!jbZM>=$Mqh4lgUb8Sej<*qXll@|E($vu_)$l4y zfb;cg;^XZ9!mfY21!QU_Y@^9;_x9w($8)3RCm!Y0jKz@_z8KT# z>dduMfw&42B)cCh7#5A=XCR!?&9>)(5i(j)6ZK%#Un`$DXNgvoXY>VT= zu(nv@7k|t{2oQ)BBMT@*5VSY}5l#PH$)JPXevX6pWnlRDz-_oC2rdWMP=!_)6?@Nw z7D!LNLP($l)Wy-MG0U(A@r|>|_qzTtv6|y3zWM^Gb}6jclU_Dk=5cW-akfK`+l?G^ zpZ@TcmCxI0P(J;bOIfIpD_8doNzsPlzB~*HH&YAq18CgRF5E5EGQhxDmg!J>`! zP305bj!hOhi7D5TZ`Eq;e3>ivFJ;I@N8aMdo)gG>gQ7Z1h&%`S=EFg2r`Vrd%QkJaf4Ozo%(^rxlDOBqZ8wN_&VQfSDV4Re69p}njG_E%tXSEBzI zv7hHiOHkqO^`%DZMT(B2!p>;kWI3z$+S@W=j>?Yj1m+-jK#KxBvt|dSSvJ(-JfAH5*WFYMuX}4 z9=X5s7Fs|r3%yZNx+Xtwp|$}@0Lz@lwFm6wa$ft+F0^&SJuPgRgMLtP?PY#a%PTVlN8Anr&Pgpwxb(W zI}Yr{&hkK}I`sn=2j8)#tPVwm*M|FjddqUoJV-4)P|o7IB^0wZFP<2II(|Af#9zKB zLISwck6Vr%?B}&JmS+la;%~HlH^ngbp_y{yW>`wS7?%YbNmYx^>M(bHB#@c}cL5FR z`S%k}e-vl_n$kngpWm_BRHxPq?-65pdg|tpjA++Be?p*g&u{nmG-ByRQql!01gEe0 z-}YtR*01iujqn_X8w7-U+L=bUjY0X8YL4yZ-OSpR2@F`Lx(P}qWRxNxD-~1V!xNTw<1%_VEj9hyx@rht{BA7T4fD>K6y1X(M#zb zg46HD$n!&8s1YIzSPpXJxRjvE7I6yP=s-`cFwlp)vO0;f4|LlXvEiRF|9-in?>AHT zTm*bjN7}Q~3xp~M&isAk*l{T8@57N<;pBTy%W6X(?y3{geyDkaTMc#d^@lf`wIM7} z1WBfLJaJQe&jW<{8wA|jlfmy@JuR1+F0>bz;o~U|kr@TcH|9YePWvwGh3-!$#czhK z&C=i|7{hn=Z&EJiws)N?cSgg0AoFtiadAxqmmsx<8FkTD$;6Gr=0Z$ns~+Z zo)@zW#R)?PT{6N;?dNcNA@roV(P3b*3+yU-Qde3c@4yOx$|(wP}rx zX(^{QSuy0+T1U@&PLR&v@BPINsy=)B7vv#-_#~s8qI7RM1@(~E+`XSHtw7QAm91=U zF%QNcAjm5~Z?BlX`CMV*6~ex?6Vn%n8!f{Pr!8W)5KJMX=ccx>N&!p&2%5>u2cmE} z+>w?X&ubZiLIXjqaBEU1+Y)Jit6gZZ;?-K%Ic&nqRyFJNmV{TZgyc77Yx*GWy0)x- z7t2K$23&Mc^GapqnusUBc>d;hKUKUv+MqrACh8~Vd^vm6bkT*BE=l#?{)g!Iotd@HVz2eEN=)({I31$)V+#N zHeM@pZ^z^h_)R7si}Mdv(vU8{eKI|cV!J!>(cZpshP{p8$Z;GW+q=fd+xz>KSF$09 zcCDawxEPRiO`YPw+GMT0EDzc;QrxZMX38hQ3eLF|uE(h@Lp-ph`8sT|h6A$Tv&Qmy zvq$h*nOQPbebRyiiCwRbT9b?S^^z zin=O-AV5{XA7SyLx2)WiJfdtTEs`s3rQu*fAei`*V(t0nLRr;p%qkzDH4St6S161r znLOfQWs!P01sq5CpsdF6b|U<(&Sd8T!zQalC3EZFw~=|@?~s^>c3AI#-&2HDsFye` z<4c<=1FgLnL=_@&Kb5MC!D3Z)4d@5ZW+kwoA=GJenV%a3utrcBgSEG~YFA_e07 zOrDQQz)D8>s#6t4_@LBWCL$uZ8qQK3b6ms9sL*S-%D1qFVY=sRXagHgMIXVG+an-6 zUH;>X)4#ixpX!yl)8V!MI|MFc8;EC@)!up0^0yZ6+=|64Tt#$&DQQDqQe>-%<!>{51c*yblhV-I;_Ok>vXs(>VDPKyeXqF>FY3m zS#w*@xZuk#OYfC7hK`{lwA=PfS6FX$g;z%IixKm8H3gYwIYw)VIydl#E&h|ht8)5p zOE&Ttv?i(`j-QHTi2AjmHwxla6jPTD-fz=KB9%}&n`%_9LZ3TDv>b#JhsTcnTAbr{ z?8N7Voa-0&+!^LcKt&e|4 zv_tokAGHx~hzkEF2T(dtzfTD80bPGL5e?uHd4l*jB*32noj~rnG+1s39ao?f07<&| znrYEmP_!{ot0JAb>+#O9fgwG8jid`1jLkVt8V1S;F8mbLMYo*|$Y= zo(ezG?CMR8++7L=bp5AwmBmvBGX3RlDy!!|?dnb`bpz!O?P~e*#76(*Chiik(@Z+? zpx%bp>%E+ra`};4y9=AXQCrX#vr2c4Hd_p5&jjFKyG)i_n0(+~&xVi3G`8RCM1KOy z>;|tmV0cjoNyXIb+~w)wwo3Yl+KW7W*NnNWZMje?CCGV!8s4>mUPf>3e#tMp511k@ z5sdo7S=IY2nv!g)WKh9LZ!CWJFy#u$P0A7H-Y4*~GpD#%nW$l+G}{9AOn~|jLNLP5 z>69^j3yAS|gMXr4K!@2LtYtSC-n6{6Q(mdi673HY$}iu}P!*2+1jIw-VYQ>YDiYb| z=|lSwq+5Eb!OkYERRTaz0!_PEQ=&9t2R1HsZ1^#!kD?GV89Sl}H*6D}So-nBjJOtK z5=^`M&BUP$Qn#LMm3!m+H|8zqRwH_cUPf6uhMQYGJ6|{XkPUBRy-0(aQtSkvVjFQ< zNrvOUp%ZO;!Co?9nmqO(I$-_mxv{+N`Q5?WHHdHMIkMZH6#ItT5(M5s-3P^PR~oYg zph3xkCc8G@k9{*%-Npk9S$^^ZE80}ue0hhK>0N4zXNXY4J((L#8hu%R;eO8&jo*hD zlNNrmqLt^m<(Z`^OQW1eVQ*>!lS)cZa!@e5=NXzuIp?^G72KAKe;|^TgMeg5nN>IR zlWfhvlc8H8H7~LKx$xz|=K!v zuCfh6!B)$iKd|@>d#|$6kb=I{Tz$Zi-RQuO0p!Zk-c05jmUdf~Hn3;od& z*%v;M9#44R)vSdPUI&?eF99oBvwa*V@5QM}4*ATH49tZ-kiMIN-v6=uBT@WNFzRj} z^Sj2-6}m7OQSs!onUiP(-9M9fFn#g`$1wH%PWGtLP1yruxvLw zKW+P^AIXP#{MswUmt^5R-R8fH#{qj$tidB>uOVx`D~~JW70{ z{WFMdtrnHo;A^aqSD7)B%0;s1aLn*K+nQCe5H-S?KH+kA9{0br#F_WK;AHLBD5^w5 zuh)N=R?oXXqezkUS3bN2PwZZkC^R5d;BELHZ=fdw!Gq&8qdvEtMj@$f_sPFURlvQP z)N>DlM!|-BJx>Gk=Z93Urfqseuz*qo&BVQt#+4PntvfhE zy@^hEMuG(KSI0JpEd|()e5GSi11-H$yN{Lbkx?Y_|H=G(3jPj9OG@ZtraNX98>!hQe}Bn&>sbxb`1escr8MXPi|SI}bkU`uZ#^q7_laE_kMN2sZn*8t95sKPd95fIak?#b zP0Tr0bj{VyjZ-|DnOoQMPUNsolOR&3I`KDU<7%%p$`6xlhNSABd9tJonsAMNU%v_Q zfTyilLO6%VECg8Ty5zy#gxR^Svj12%m9S7j1ER1Zx7kH8TPb&zp5x*4v!-Uf#VWte zMGRb*)i8%4{owqj`8rBfS70nK2k5@<qY%CkqScZ45eTSvjBb7CzA~<+5kU@8bbdwd~ zVqM=@^oi)2BGP!!7u0;X&7ZcK5)gCv9E{WV*@=^h1sJ&5Ap-_GVDeRk_#fLf9jkt5 zoJ)@d@R4mMg@bnBl;~zL2RH~hW)=Wf#B%!IG+kVCT)gVUq{m4Zf4<+Hpb+(>bf!}g zVR=}oWLCH4>T`Rp$pSC065qg~tM^srL~f{mP5yDwEuGsW(qYg;;EC@9>rL{ZT!4qX znyYd7rM8RWv(aY8j0g7?DG+Z%{|+22AEi23dX|Kh8@yKp%cBZ=Oz_{8>GOWW;nm`s zLenc)tgrGh|54(ccNVFJieRm+=#9@+4|Zr>Up|h%^BA_8Q3ae!{f^RZM^v*iybXU* zc7);w(940>v_B|`Gw1GfA0ta9dcbvl3VXT(+eX1$`K{W)4uNTG`Ph6WCt>T#BPiBA zW7~;MVr!mBi>iU_O#@-l%$5ScuK=0Pf5lPaZ9BMWfJ>(V-i@KWHIUM--!XBmawKFI zDv)J13FDPE`z7B}{ce`%_vyng4+!I@%4$IxL|e+nENkNmF>P5W2_Hq~EN3P3{o$l* z@(c#OJbg2#vAQfS$b-}UCdID)`bYbZgw2oJh1gC16RVw*0!q(XqVjUC=pc$GV{7x{ zGh5XUOzIW9ZX_Ie>Mt&T=a;4aRHS?u=4{Rpi_*PH=i0yi}^>J{Hu240rY|Js|jkDQwZf9P86*o2g9fG3&*NV};8d|B*_Gx#4X@>( z=DbG=O%HbQMWMREf_ah?{_EVG&#K>Yp44-gvy9;k;9vR3M3=qm`dx*kK(;G_^@7I! ziekF5Ql!tbB&2`0MA^LqP0uD3N@`)LId>HKey7U6VcBdT3pA+Wgb?FH{Yyqqo})(b zvJx${{58sO%SBVt>EYQm8XnNARXxbOj_nPeDGA%xd2n4gNcM2BJ1~d#vZ5s|2cw*C zj%2ppQY$V#0itGgcoN{{w9|=yYCLap*(dDhe~mTZ!=trFn3l@l#p$rJM8BkSV{-eO zzqa$%+61W*>Rr>8dFM`PiBlxQMMCBNi<@XFfr)8^D@J$@ z@D*J+^ZM|{0KgNgxpPX{D&gC0ItaRJbO@Ovw>SaBZ8;E9UTA$Jjgc#_&wBfG zSR1qT&i7r5Bl4Zne)7NtUX;mm;M-7PasaC>+D&Qv7G4QTZO;w0WqCkatpvv)Z#8tx z(8M+vsE%G_JZpxN1cLvNC1D^R*kt-flaBmKXP@8ZN&M1_R=!{{p#fp2qnx9Vx5m7M z6Fc=;Pum#w8)sNd-pI`P-QjcQs$AvHol~-SNlLOseo}%^jmjuji%Zz0Y;P)8?yZW@ zPi~`87U-?5JRwIz`*`Hkd>XM4XscN%pH4{qdhw@fd}vuc!h<=bm>|pH$Id4}R!Aqg z#6X;EfG(k#z`Uu&k84N%3$0?G8qzB`2yo|dUp^_A$eX=bOge_OxZu0RgFlEK{kwU9 z^NJX-k9RNAo5O>*!BLQ`3XWY0JD3>w?aywF?ed7sRp=2~cFPp$beVz#rALoJS)4Y) zn=vR0cmd48W1NJ%6&_hIBjMjvSluWk?cK4WXJuQ^;ea5OK;wa4 z!PPpg>f38goSy*p6Rb{jY$R-vE8y8M;7e@;B%Cn9Ao=6`FE%Jp4B@g>9J)an)xbxd z_=m`USV`XOu7VF%24}my6^ToX4STc@jbTinu$HEn%K_{_%UMuZBtW0vrp_e@t+~cW zjrjIg&{=(j@GXea@LDM=R~K*LACj(*H#A(O3fbp$!%(t2VdJTx^1$Y^2ZmPF)4yOZSD4m)F^;? zo)!xVzn<7Ohm#6@mvV5m{+;z)+DOTRWFU{!;b(s{F$c4FWp~wniI8~J+tQpAl;S#N zwFaRge0*kjxzFj0D!i&EWso!d^I@&a3Wzw2e+MI}(oFK=24hob*&JiaMTpw^IfP~9 zb*^B=4ddHT>Sq0hZ12@qP*nMJz{v|+dsznvxpvSVfrTcNt%0*6^ocZvS$WnL$1dGM z!a#TZ=u2f<9NUz59qPW9Nc(lmKMwCq?gw->_&vNfQEnT6f8qCPj5h_3J-C~^_$F;w;OR4oDWGK z|L!s^r5EH>3pTuZu>QwONVnXeFkM=s@5-xsDIM?o4H@Ioj5F2y|GJVp>qn_RcN(2|e z!ArA)ACriUKA<+C(GC1=5j*{+>*n?T-xTybaVpfOF?>dwZNYM7tKr}KH%gyNchOit z&|jbmXXldH`w1U%$EX{NTDGrGdy(_uO0YWg)On+0GUw04Tsu_e3(q-Az;5#RqyAS= zHqr0ixhOPTtIKU9@ zi1UEHrDNfC5bkqNP*4ikLXW70chhi`->@$3-ZU`1kpW1|%_nZdUKxYbSqwL`%`+Vf z5Y4BOuuJ#TAqA#1eDGSJ#!-k@9)@W9CGalZ@_lDR`)U#>SwedD`$+!2{9JjeULfjqRMulp&*Hs~ny}N6)YR64rxH}&F?j!Ld~m7Z>1~e2z0JG(O_KMQ7#4m? zaa)f=9r15N@7r~WSOf%ocRsyxT+?;vE!aCL;%!GJPNBUfrnDQGdT%;dW?aoo_+o-0 z$9U__eo1FDB~+{4Ksd7;Okv%%zh-%)_Vbd5u^!$W*Hs53B|Kc3;esuLl#@e`5_z_) zS~CvsL97DPgMC}r4uV(SkF`kl$+Q>Nt0o8FmO5BLK8l zk^(XnuHpvy=~3j`@mz~r>a$jSXRq0o-!=}DCP-64WTi*UB)fo6kKMf3A}3r_?Z;h} z3#{iaKX|8;rGoxUr@uqzo@!?v3X9mgP#*kr>_CIaGD&rTFE@p;aR%o&L{F_MwLHve z=IOe$`sBv1qK@wv7K!%@F&K0alg6zb>h_{QIq!b3d%!uXZ^jchssX+c9xv~V5CpF zAc8ed4EXNZUI1UGrfOC3Xn`NoCAQ&nYJ0C#G#qKjgrH2c7vFNNyQ*QArO^9(U;52jEt z3;{p`^9LX#CDP=kJc}25vhqF^h$(N}=er@4h>e5uZU><()NAdk6nq)H0zPXbZ;;31 zTFOP)N8z)u;ZD!x1h*1Y94#ERfYGal?A1xDdOi*nGCQAMmpp*=hjP5QT(iVOxUyKQ z6~lPzrn|dC|d?(QD~U3kA;rh(%FJL$0PvQocL$DIp9X+MyCBIk^USO zmLmjZK7M%?RZ?YzId)Ubn;vX-=w?of-W){y=_y{;#@8RM@DHyi~aH zZv3zF;2QeRCRg^ct>+~CzBk4SW~CtR@s@By@jF)AzWt3l!O07&mn1!VgoTJf3p+-e zRtUK3CJyDo42H+8V0Ul|qZ>+`=gvb+AI5)Inh=3w>yq0c#H9@Y>VRx@VgKjlpuBbI zPL$NSYe|Ka09IZHWVuWxN9d0w{D^Q=zVQLuH)qq~XFL9o{r%PBn28%t%~DUSCiasr zx7=NMG5z^OmrLBwC&i+<%mN2v@7(3jZl~%s+TvJVHJR5cv{JJ7iKU(l&DyY|*EP>M zVVbR-8tSIH9YPWOqaV(nORli|BLAdYXnl#IBez?~#pMw2^m2~u2(7Q9!ub1p1S1kV zxkBF^zf7f*i;26ViK3;y))I)HF?g>-L5tezX60I&{bAE<8jsqBu)|hnO7$9`$<4e7 zOdiqLOI}i-!yju1ajDZ{ML9Fi6zU@;Bw~MG7wq$8gR+?qb(C`!j$j4s;g+G^Gy6Gr zkK4UZJD$vTW9D+TDmz*1UR=9bHX!S?|5;UY^_llcPsZYCpDdqt^dPBDDoXU)rhKFdA zVpAo@whGg}L_@8kju*83zH%8@g&nk(F6RGFe%WR~TjmPS-k-1sl18+4;WYO!xfG=u zq=54$+FYn=d!Gz{`;Y|*ZO2t=gr2AkN6n2dlhGT@ZM8#}mhD(N$3yk!JVn*r0w?Uy zrCFWX$}2oO4xPO;aM12Jt5*9Y;($!ep_g_PA$i4j6_e)~dQS*Pa5tyr&T0up51F=iOzTpB(TmaxmDxWLp? zTSd22T~AM3U^bdsPNZ;@j>hru;Hrf10=IcrUbD}$>uCTNmdor#XWA%TLHYwz)8gse z#4D%|dPFEe-N&$=yggzKdN56)V%4Gvj;2N zk?et9_>Ku47DOX*uy=p_!!wOM+QGlKhl=V#V;P9#wze%Tmq6##T1l z;vKXfyJjj6e!M*09p;FxbcbX5@C18ff;-Z}>vXMP<2f6!L0XA+S6gFYU9w@bKXA;3 z%ivZYTc}0{9FYb!)M*e^T+SnkERFex#e)Bx%@C4kN$127!lR4i8c*MQ+ zB~SQexC-21*|C}$4lPX6UuB1{Uj#w)fz5)HP)-~ziTqOqrP3Xg=)hXM171_RGjHLJ#TqOk~dCJ2M|>bs>fB$D9p)vnzgA4D-mX^+Jj5g~k9iz%+v?O6_>T0M@6wG2 z%u4K@ub;dD$ZdT&@FI@wK#)DZvwcI@Fmq#z48vT-zI^CKQh)5y2fD{JBGb^x9J`eE zM<f4}Uz%1|P-L;Q^P>ovgUlC8LLbQf;YSZC! z+(GXe+Rqimh?s>bVzO_u*Wmd)yEM1r@z;e(j54PyA8G?Q$H)I4h(eIj#!XJ&#$xSt+=f#;0oS*x2Gy1->72 zQTfYKuLzr&w`bl?B$i!$Z(%;LBrq#XDgP+;d!~_DKyz%fI4GKM=*j7zJ+)INgeXZA zFShd9sS~ESpK~Nx*@q&;FPXVFG?jK6ZcU2v;&GXV%ACGkN=#(EBYQbrvi2cv*b2-q z(9PZyW|>*xzc}|}X;v&}VaG;`^x-RwRUp}kr1XUzGl+}T8tr=2MoJ}2Db)zDtq4@869UX`m%qjLTVo*)DywsCgv z%Aa-rI@wwmH8pH%-BC?bZg{-~?v^{KhuZ)`%8y^xNFudHd)_(Ng8C42i(pS_e_Q%r zU@LkfV)+Eh-{tKRT=wY+Hp9DegGhME5#ef0wa3wPLG*ENf5MyB&~Yh0>TE-# zMh?+fH_0WG^uhDZiJ0-q6bas-fnwf!F@%$*xJLP4`JG?XoQc9`)0#~e1gF9z8e6~X zoIH2=%@*@H1hIZX28;m-g_^?Qc3s7xi>7w_Tj$;+mngmHs9@JCMTs*8~2vdl#a~RI#Va_Q5&SO#Wz)r#pJc6uj9D&wmh|mfa(hL_->$mk; z!VcY+6MVc%mclqqmn?%4I~Hlw4-P*tD@sF=##;(ys8qt+sHeem_S2iqprMN3!=Za- zIX^OUMyId65gYkkba0N(Z)0lc*0;B&P5*vNv>ugT8%njZl z9kw29P?X%~2m_HG54e%|V|Ifbg@No&{R4<`spn}!QS2{^^Po2Q+8xmgy~h+-!bI7u z^W4od5p?H?1cOJHp^pk zcj7J67AO}Xgq$oI5Q|cO>O8>NxUI5e5p`JGf zVw*qp^93A4v(%s}WzC>_R0Xo6kvLfL#LbDd7hpu+Jj6IfqX|t|2AZxg z_7?KSbYB+1IVAs?eb49r zG>WO$xZf@l4_Nt>7MNZ-5~cIP)#;?XL%yH=E#t&onG!+)1Ki&Tb^P()HDR0tUx)NK z%U37h?y=M67rCu3CF?k3e`;k#Nj9yYVY>1;+{!)?`>jUjlUi%YVcoXlf}if^L_a^B zX;^;7C`(w(j#Ax?)A2Jo$G?FQG~nn9}UO; z&+)!Ro@7USN}`P;R_VoXnNL#&!@h*N$Z)3We<;SCgc|43khZVz)dnVy` z_eYlR#*I3Tt7+%NJ$juy()(>Gf4}?t_bnP{_XlDkA9{L#s;^!d|Ioa=8MNy9^XPQM z)|~m+CC;@a+|tA73B`o|?5B+07HeTw_5?QPaHLcnSD$fxWK@9_}KnN{82 z#s5dsmB&N%fA3FGB1=+sjiQh(``Sz*O12`Bm}CoCvqWYl*>`2jI?29HLL!WPmwhjT zu`iji-Wk`OuiqcPzwaOSb>HvTJ?A{XjHW5NS8NU$Yeu4(rp(5dGErIqltIIkaxNF zcIOO%6)HR4EP(he(AjBW-xF#NGA(;reqIDmam}XBpfeC8%j)#fwEAzMJ))$7dod&p zBG>j_|7LCW-(?+5)6t(V{@g0p@|IJ{T8^j<*^0rXVlti26MlU63K6+*H5X583$h%zxP00)m3lZ9lN)a5>7R7xomnR_m`O>M`6Fa?HjZp5~@ zF;tYipGdAgg)F#Wc#*E{kwTrQ}o0D)x<;zp-wt zlqQB(NTCxhHhr!|D{6T_T5eH2Yk&X}L4;YNg-jX9YPE1BY7=gM_rDOQfb3syx{Ntd9leSf!k`@X+RN_2K1lPQ%@o+er`(upHoH@O$;I1;|x9% zV{oYTp+BGyfUErp*&V-z^t2fUjQd5eN5;)`hle@dJuao# z9shzFhB~FqUNx}2+FfR_RPBG0?~hWx+=J^q+8a}jk2`ICyJF}y>mEK=Y;_aI)Q4;)LH7FnZ=o(xo`9ZPq|JRRm^@npDJ;nzH&^Y z_r1w*0W$w%l;xfD>IKt1JL&dM@8{gCrS5J9zF9}MT3o$>k1G8SCUgDey**^Ng~xE& zMS9;G5I4r-l?hdk@-O(H(sM1BoG!{GXr6dV{VcHsGElruOGo^^Qo)+h- z7rhhHf~%Vo3~o&Ckhj~}Exf_4wngSSzqG>|6N`B}!x5xEK^JgdAR9Go{WjPJam!$5 z{c&jW%_~3Hy+Fy&ZDyFa?mgnitb|Yjj6u?O4nTl<9~(MQI6t8A>}H(FXU8B!e%$e- z?p1Y*uq(N*a^sj~zmUwvM{7=_`eAqz_nbHlwCM3h^z9HcEMe6;8nYvE!3X)8T_vmO zkR#ETc&b^L0 zL5zr?hpoS96Y0Q~WEbgMyT1y}-Zm|+$~t-{>Z7;o_Y9d-Gfq+I?IvI5@LAM6>;4UhHqMXbW>S2#Uufk&?PnNooBBN-nj_E`|njc)Q==w8VExy zZZi*}=0xI!pt}`~?!gQFZMdI#9r2hrL=gV@FW@HVE(v8=U&u2gGvFt5IJ0Fq@O#p0 zYeD7dQq68GLGi}_WaQsH5>X|g!RSEjnxrQ=tuchh9J{N-Sq+ry=g(ozpB%@_=B z_S4?{rKkJ-;OeXq705r&(zE2OZtZ$cs{1YNyY1v4yUS~0lJL71B<_KPrwiiV%4MIx zR~vf7^PG}cC^i z%ra{zvn>U+KS4fRzEm287xH4hjV5jjPJCe-O1a7SYwx=@BsEd{KoYXx<=WrXHHGvQ&)<|-Xh_C4d%=jn!m^{Y;k z>$=w3%UTCct;Do$#?7mB6knAwW*sY)$URb6&%!uej!yDN751oGwj{w@E*pgnRLTsj zf(iEI8f4Oo;+mOr!yQ%`*V`wqN<4qX`(m@rr%=_&&WSH%=wv2vW8WzC$p>i@yPL*B z9;IRFk3f9245K^)k7ec5npftpoPU=XVrtpR?~NN+!zt84T>F*bky zXn1-2Xk^Uc=Pm+W#E;>OYy5L=BNy$w;F@WGU+;#B9Dh668(?L7c*re zje7Z?>?qLh2QqSJcz$ZXBca@)??Z~vuX1xmk+L!uaE$!7_>uxvk43gODo*HmKA8&Y zvFRFA#R93PlSTJGGRe_+7bb8<*5Oo4YF)?t3l2P8K<9-cAJs{hxa~rL_k2PFuX-%` zSqm;oRJZ{-=DW1-_Y2tiEgjxD7p^71JHas3x-h87tDS(Z_@Y}1M{^+8|198H#V!6{ z3m}SG#Xrr}5(1+QHLl4tmlvu=rtEpQSp6;39C!uJ8nJ7oqx(`XjC7T84J4vZ}A zhWpz=ns`RRLd_pO1_nGhRg-eTebI2Xr^UdtfiwFP|mYjhg>dq{D%`8CsJFqp&9oME4rP(QfW9iEBQ z#gg=8nQ4;|`EzPC#hKW-ba)=(U`*d1&*FWiMf-d;x;`mQd_%sOUn4n2_+2 zQsr+hyw{(#ppEvYu~9#{+)=3*;xrg`0~KWV?Z=ASCA{*zUbi73DXZnYVNrA?YsCwD z@EQCH{y66BH~W74w-Yu?BMMw#zSLW>1p6TP+)nt&T?w(O^n8b2cvqS}cV)Wa9=F`c zBsKL$sqErF98l#m5t7-?=w^vzP4;LU&;LtBx-DrIePYC+%0X98;AagAaI(z1@4 zKWq-fopc>TVv7SKT?BiR&gLBZ!cw|i$R5+*uA z8P0aZkQ|(Og@NGKbkqPJNTro#oToSEN=;3{J49f|HZ+qQAv?hn zJ3kxhxnE~#fq$kaYbG$sv`e&)kn=X;2mgcvGsf!JyZ8I{vmIm^ToS)e4Z8@~P+a~X zjO~(9HtSN29h5_gLn1;PO++|1>5MF5Wx&Y^kjUy|o7gXsWS0q6wqk$NQ0NVE6pj9* zkoX*Ida)}Fq~Q_hgz@p~c$v?XUBeOlA+`s>L5a~-$^gb1bPh5r(5oO~K=bJ4=!-Z0 zy6o)@v{6a~kfwtz7Jo&dIh_<3m>bYfjB35`06a9WXuj%G!)7t}xcy(n9=DAp zu=m!`S9P~UtlM+)udbt)2KvwU-J{|LpIn4?)>W;xb8{ZLbEF-zw#kWh=XM9Fjj}gg zqfNg4?X0T4lo3c&L9XyDY*^7d{hL;00sj4$iAX|y`Ox0%FqZTqt6jfAi8sKv@b3HJ zL@wm6-EBoq7OyL!rM0f1Hf=Nlu1{-O znGG;qh2K{MvMzRVd3bw<6$pNoxODN{qvszeH8$-CNO2hvf&|-ZIKob9@-BSK0H2HK z{p$>ED*WnlT4xDUW|FJimo6tbHcy{cI{<=!d)kdk4j1t9za_O5qG*RyU-Ege&=jQv z&AJhh*{8~0Go83&)qqD|bF)8wJc}^}r_;c*Clr88j5etreJa+XZ(@Gqsyxw}8&9N) z-+a7^?YU^$-#8y`|4Cy16JZglo8S1Isp+OhVAOw-3p7hG68m%Y>^!M?zZqF~6QB2D z4%^b+%z>Arc^y}!yj84 z>6-Is&p}U-X`U|voxr%3vR*)h_XT$e1b8o)lpQ|UATpH5c-q$ZJ&lfE|_<73x)P<`b zV}p`{f{>JiXA#KWceK$BNA7?R#c@bdI%b$XNqL#KPnMW6_#c7vdZT2~!5|%`4J0A< zF_laW<2OdG!e4KGFkJcuFUkuMP_5&{Pbr~!-gu=)BQ=}e#2}g%o+W%*>>=!L?M-6I+Ulz4Yr_`(^%@~htKjXATNHf{EsgA!l|+?KFvK1GZZg;SyYTu{aS8%7Q{GXGsg4~Lomw^U%n~zIrF1}bqsNap34<*yixd{hdV43AkDuH zV|yK9S=|+EC%Ej=CA)j0`9Dk-&G$W9wFfE`6xq+-Hpyod3-wT0&BB=O$N@UhieXdn zL%`SAU-lfgqQ^^H6-bpzaFkJW9Xq_W&kAA`W z&*ZTG@Bv63P_>QEi;@2Ajaa1F15Jq4TCvfxdVfoqHnu8v(p*Sgz$a5z+q$_jq3brw zxM%o#Y#Um$L*V6y`sKKAx|9GSMUv&r;9a}`yuXw9P)pH46o-_rjgSV1gp*X{8DPionG5+fa|tT{Acmh*)`+|?jPfZzmoJt)iSNV=Q^g6GqOjIPMQ2p<8{nr zy{VmE`?8c(`7Lj3S}Ir2^Sm^5#T;kC)BmvbT~3$^vA@woB}n1@1!-YErIvS*@M43HQyt&+errGSzU~ZOdJF}N&kbk@WMIX(T zjD*6a>TD& zP3P)1Ux6x=;iK+{EX~;@ZD~Q-oQ)ydykF~9`rNnrT_5Fy=J;gZq zBk|C%d@lP>F%7~phTyQyE`6sB-&k(*Q*hd9#xY>4WU|Lv)wmfL&nNZKhrZ$N-6CZE1CeqI(Wai3-|HBS~D^o17<$BrLz(qaT_M;9bx86DtGA( zKAC5rtKQY|9b)VCV^Y2cCfT z5?8E#eI~q3&$P;*^rpz`mr&)cIuF@bg^9avf-~U5rMdI$0dxvh&XO+@2qbub3Qgd_ z&lcZC1>ATI-&0+I1S8E0so}#+F3=~7)b{eTK<(GxmS0B|5t`BE4eE!@JyoGETOCAp z-INe%H7YB8FS*_2(=Z%mf&iX7TfC&@R6J0oT#<7HpN=oRqWxc}@)hlUf6ek+O!`S# zn6T}{<{!zs(n;-M<}(jojj(GTRErYAw4~Pur_2Pnt?N2Hql2+lwCLd0!R-2e{Ijns z(p!ws+#c|h^SE1S+3@4YgSAjIQJ=&NYveng3Ct=sD+ZOh%Gc;vO?JpV)p2s$Rknv0 z+#R|QP!ZgHW8a)F;MalF`!A#0`w=Yau?qQnfnP;-a$zm_|DFX{U&{<<`Im&f-2|(= z*xSftUp4yPzXm@4>NXa`fi$;Dg(WucSCUK2JBbtSO(=dsl*idkhmS@mxhfJI$SCLVQkrFWM}R7))!>f9j^T%jR*~#saY5P6T0S%uGn{$F zh`jNvtPEih5K=%c$;p+|X|3R>FFktHU9f9dbO7e*vnB3>YaW;I5B`z|q|h1yBc1u} z^<=i78^$99;dEb>eJItmr-1Kaf&3<*_MSO#XLGh`g%G`*55y4VPj;(nm+bk875w(l zgT)t~Wd@APN~@DS*Z++CT^P)`<*#GWs#IKhmT6Xj4qtr^Zf37hV&$o=>)Np>5s>O} zVrdmmyikFUXFJVy`vlcERIT}4ffjUMz)_L-(hnC|cya2fw>rszQH3_(JJcs6z4LLt z)0)ulOO4|f3As)Q!(m@XS?&)Yk0&_smD2BdrnX`D-GS=iPLqsL3#t}P29-~tDFVAx z5g*vJ(s{`%7m@d4EO*i`UULhQc=?Jw!CE+9qGWB6bBw>3d@<;U-TOovbGOuY;SQh` zN$xfn3NXKp0WpR^xlhi3TTtAW`2o!{Uhlkg`KKoUSq$%V$Bb8ixpE&O2&m@4^L~5h z<+cxuyXTC#B@;S!96AxK+$SsFv4}oe%)bCi?EtMYH|STI)OZKNQuo^8c0WyoWJbi| zIcT+ko!c=lq5bdtVZ_^>H%xGxePxW@iNa0cP7R-({#AGcYA`-R9x9PMUQTX!85+5w zxg3*V>J)*6m1kN3FKF*b4b6??)VHSuB6ML6`G1Vp_EVQ>>GOt*15%dkgWWkC6dv8= zUj5x~9=7+apWgcH-2ao`;#FRv?QuNOG_&+YjIcvbQ3OgH&oVJkn@km$?|m(cz)7O< z@Kd?kiv6_1;O!ncDyRq~P$xt$EuKuSls320gD%O_Bnrkhoma57Qe3}-U8D{|A)G(~ z=)eo371yWM@#|A*%wQHx9#W;Ufh$pjRFGvGaKH^bBO^D!!Hop9jgD<6l6dCo=-=e- zj;nv)mSfyTHymj11o<}3WW2}riA8knWYh=2R$4XJOU2{UekdPYtN)OQ^4`1kJCRdW z)hwdc;J?`f;WPF6j;S-RofhRmRmAkNyoB% zb&%n;A1eO@4+($5@eH|f*w&K{rD~3Z0`?;as#bYtGJf^lFeZ!ThDf_B1e}WodWEZB z7r{YU=7Zfhf%dN+uGYOq2Qr=`gxNIsfvZ}-2_O~`G^ygK1r_zT3d3DSjM}0*ZJiug z>9p_e9rU|5@6_YG=&|f1)UA*>4nbKVDQG(7kry=)ik5JiI&HQf0puY7pA_~rqhZs6 zRk>m8&Bnp6w(v#OD}xboxa<6sqkF&JN1=Zz9dD-3O{m@! zI{Rn%1-q$_CFGdMOkwzNDg~Z~>4LoqPo`-#{C5w2W^JKnf`Q-L+AH=!zO@iO3$u-- zZ2F&d;))>fw>R#2?_$aYFC>@(1Ss&c@HH_&M_n-;y9&e45e380wq**p{)8KCX< ziiGhXqmXx-|I9Pq#a$Cc3Y%tW)Q*T?3LhE7+6Do64_aCUWeSX9{%AK17OdN&<+2$F z9}+LhX)6Y{m2?U;^0R zGi@0e%zV~n02X-PU>JKZj6bgT2{Y>EWlUfT_RPaYji+-KhkfheHkb@!>@Z9w4LY~Z z7ozW1%w={7JH5{ndH=eMs1Y+}XzFf15aMI-%;6Ts1KrMKoFFjr3lKWWza?!uk{sl5 zZ2Y%vY(9?cD=2@5&2L>_6B=TpeAL8zgt3_%whn>@>vS4n@gH-T-19ILLvo4^7$!XP z;1r3O=6Dc8X^wy%Nu0F-r8E`t{m^aLmQ_R`?z=4C+Q+h-iH+Q0MmTYE-Y^yzxZo;_0?Z0eEPM zA!FCzNCG4c=PRF73BKaqoo6<9(*AapAz@I0r<^9{QfOX4)*0+zHm9Bn?CI{^&{WxV zWhS1}*63IcMU?0k;7fLfOsv-X2e7=1^a^LN*?Uwr$_UP3O=c}B(?sM}e<{;M8DYGa za+nXagHc|Zs9~KqUk0WkR!lThl-fz90){lZSAtx=e=8mBF$_o?Q_9PiaJ z8v@C2d$wjLaV*%H;JkJPc5m)h6ygal#_q+}lTH+S&{45Pa`Xrweu_x!9d@~ zRd0B-`2*k9a{}=5`Y6EH8Yi!8FiPFzeP%|7we`VaipAh%7OP z7o|W>ieq>=^?;_8t?bd}@#S);mkzY@8PB&1MxDESW>NYbJ+``IxQrdEd@FU#KC&5g6sTmrW>=oW^9^Zo1#Ns|kg3r4r&(5n|` zt_nKri7~EwjP)!%Zz##A76ArEb&M?U&ZE4vM8Ehq!zd0)~j*JPS(H0AF)b4R)jGj%G!}y`1is1<9wTGA<1f&RgSmcKBk^| z?Y$`1|E8G}3Nds%Imlny3%!q=cHC|UrvYQmKYbgz`wOEG=`ilIu1=*zv|!8uBxH=< zI*S{*3oB#8UJ$eiu@s~tEo z!Yu{Na?OWZcFSqPkI1_YnfIbD;?azB2raF2OpNf&sgV9AM*By)*6sJ;`Zm%X(wQQS zXHVqcP;??_vJ%Y~;E_N*n)Af1z;c{7Qh`|>^XlRH9>2~TkM=x{Ltj#(;#zyv{l zvq%*i6@(sB2kwq3jwz4Ej0(PvB^l22iLD!#^_h#M?hKr>()+;nz9eqFzv}KYrpJK8 zalCI5lOElbVRn#$m2xRebh<*7rKx^h62PR}7Ov)K-ojAq1QH*Y_2d5(lax6U-Oemk zN%SR}j75IQB`DfRc5!(dmHRg z7dkuHih~9N92TSl3#1+G_{?PG5pnHqJ8@{)SU1FZPV{k9Cu+Zvu+WFYSwfQq@Y1~^ zU~89c4qx|g0v53GQ6d@!{bZ1OT4jwTPyu~Nc)lmwbnXH+mB9Zu9ML+)BenI3AQ`s;v)Iy$^i8cfj)NxOPCKfW$(x~{7-Rb7< ze%V=Lm$(~_I2QBL@6kjfOx`Cw&eErkq znnaXENb`s`2A52d2!OkSKsTyF@pc;LXw#uotqO%zB0qwSwz_Q8?V^dY-+{H6nP#k( zVSaZZJTlvGoSu>=a#PRzxRFavock0V8|p^UBQRh1OqSPkc%@P-494H)HgBB5|LKiA zQ6?a6(w*ma&b(8KA7G(mF5KS%6y#16kg;tH<9R}X%U!$R0c-nf4jZQ$*&7UiKkPTbZvyiJ zo%qf;^CF!2SjoF4qJjG~%4UgLpBg6QH6HC+ zev5>mBRpCMFF8xGJ^pms_Z^43|7&!ds3h7M?@T4A(>kJ<>Fr@c!Q4rXWQB0$y_EeM zhew`K-`0(~NEwwtCEBOd<{kQJtm9800e^kq6r2l9; z2x;dkE+8izT`)TZ!xsptEa5GO4+3t=2N*2h`^t(;Jl!Ar3)Bj_f<3q!-a^T6rnceL zSuO)9@PkRX%bkhYhVn}F3cp3;LFT4tYzp@sZ8ii`@C6Oxr!-XE>A1eml76&P$`Mgr zYN5f1^TP^I_34b~q2NpZEMuotWKS3%lP*0qRo;1+$LnTQ(qO{`Z{uwgpm5Z)L-Clk zZNcu=H{#`u|0c|ks4e^S*Y#^15AW|*s7fceN5-Nwv~J(=ZHXt+qh(d7P2-iXS~ee3 z-y5FitGcQdLrPP>6GGUK>mc-0jXaNz^UW$mcC_+LhMRPW@p;woso5d94?#HcM3e<4*mK2vx!WS>+{$a$YFL6k;d8FYH{%Z0N zGpU=c<30Ss(P`Ce{L1iD7e<>Lg^^Qa5XFQEIuwWb0=#&thy&Ps`x|-%ArmC2GScbh z=hBZP=dUA#>i}KDgL||kS_m79{!gn;+{<*tWns!?N-ykMB5&oIu<=lf~ zYRbX>2O$pG*^45kOrZ2^huAW5hY<>rSVH8g9Vh69CdQ@-2v?l*3k5zpy?1l@qW5TY zi|nmTFkbz0MsNO|_1Bdsr?558)3nGuGf$t5vYPm7pCJ{E(fkdmr%iZCz42R@H72G$ z8_+Mykkvv47}u*nm^rt_nsM#e!8fn2H*@J&l-SfT5NF#7X!diGVP75!$BGUl-PUR) zRE&Hh4TW9*)R=$GS&gRJ(F{I(=$8spX3yRGIKQh+*O+no*^SzJE-1bP&M7@%R;^mm zf~6PG*tLvquGX$W8FBC0xQ1I2cudabuUi`>M$Xjt46lM8M~33y7q^y?&H65-F{o>GRd;XqX?)Po-6kn$y+8KDT{}HPqo_!S!C#ts;l*g++m>)^ijD)Ky z@r&+E1})O^;~p32k07XnPX@60Nxel$%Q*#eGTM>%IALeI2Od|9>=jBXvn~6dOg38q z9-$}i(0`)(bt8)?DNJ=q)2pKJ^`;7(DLKZtU!ez~a?|9B%hv~fiiV7f9%#TJ$_3i* zHuVs^6@xq>8ofPC2gv!_6FIS{eJ$f15hV|GYfP+*JdG22kEdwlq@y-wSa*`;o4P!y z|GDE(^E0!gY%Xc5Pf-gMK=pYwCz&F6CQYvCr|Edu*~K1~{QU|)?#OFX%CgsO$nE^Z zg=i)d4+$sR&LQmYbSI?`6eUiN%{>0dzpE!?(o>4bs`&0okHoqAGU)p+7Bshr1Hu2X z)Q8T6@sbscOdk0bzY@nsn(^dhEeLzRzJ#f?UN+Ah91Dp*_*48$wPhlrzs+%a+9IfI zh2VZN-#woEaUOaAE~qZjW{AJELj(mcdA%HP8V8lE3&U~gXPg5dR}7Ime`mKp)RR%x2~JMa`uCV(YZ zT?mZDfZwS+z=}LA8#V8SK96})81iKG?yo&DCpGb9#la?+#>O(lQBc%#nU2Ab#@LkL6eQN*rb-JVoxDv{ZFPvT(`0Txi z==m}Su(4%2Xw6Bhk1ghtBB0d>$MVE31u$Id3}UQLoi+hqczKCCtkreh{YH*(gw z8`Cip!7zn%xMqM#HPDh<8z&Pftvc9f1A5=>^?abRgS|@2^;FH6L(m}wPT2bcZYNGo zh`A%HESVF>L&7P{%?Cfba@9>Gj9+oq(_gOxbEH(YuBmB0;X1$jygwAk*u5ku;Cfi9 zvfP}K9iKy~Ahl|%&pZ}>>&JPgx=~=>Q*tbh0Kg=?fIAMk=b*vg54Bt-Ki;nD9iL~-LV>|KU(2^K^xyI37? zQB&KLkBDGv%q&kk`sJX$PTW4~n{voFYNo00Y(oU0W>}jWCr7QccrFk?MRDVY9Ik*b zjj4lA$LA5FI(3;CP8xGdvHuTOZLCt`27yA{=(vF2x0I`Z8w5)WFLhS4Fg3KEsgi!m zeKdMip5yZ7{c`Ihw7;w;vMSKeh$a$Lq$jP>WyKI#SaM$`9i4D-D3 zvdQZA+$%j@58_9i>Rtn<_R*qhRL{s$MS}N*PCdGMoV^N3T>DiTnLVe3l%6 zt6%&kRBnkr_OWn<5_RB4700w&;+)rG))j~D>XHqImo6ZXMO>axPUz1^8=A+IyT1N0 zG@WP?SJ4RajO^oQC*pW9i+{Bt!&9SeQgAyYQf{y@P1@6dY?x5|-fQUfE2MDvH7Qkp z+Jgt}ZUqR{2~nGj1sIHnG9ZY~qtq`&3>_Kkbay|_5dD^R;z28L|M_HzSW^J}U6HcblMz71?( zog>`9o!bv@2<(eNZyTi_J7f$y$xM&m zO3ZSZ;#Y2=VRt2Rdd?&80N@EaezD(X=qjr6Q}A=7qb^Md;tn~x)Z$}){Oo0BH=wT^ z*nE`Whkt)w`IN=v9(AL$iAw{i%T_hmWRU8m`rplV`|=jl%*HG>VdR$NG=(-^sr6 z(F1tEp?$lUOyM$&-EczMXuWTHQo5bQw49oTLG9Zx;}=ex>L^?}E%_ffPGYIf91&Yr z*!=jI+ct;tQEoiP;@{XqgTwQxznXo~Rf%*Z9(BEL=&k!ow6n0)^S>Jwy`_P;1oT_g z^6K_vi&>}bzoTIv4cfMgYq43>O3dv=JG18=-eqitAmoTe?DWpK;`@7NAu!YTZy3%gB7^>TpxYYb1n*Q4S)_bsIx;U@~ zH2gEes~$S69D{wk%gMNDzX1W?UrPq*ufKateXo(GTqUQ6TUI+wT{g?~n^t)^fPZ^v zvE|b{v-#sK`6X&vf#VWspqZ-j+kI#`(<61$Rzyp@Mwk1n8Kc(6&8F zsOI`5s>( zxWCQ#3NxOso8!adM>YuQ;{)~1XOSbBJ(0Bwu_4~|vH?69PGq)(+9d;p+IUU9Y9P2i5L^z*s< zjNZ>niv(c)(cT$J8z$qXUjG_o@>tI4CxjjY_Sa&lvwymu+nyWtK?khv0~Lxdx&!F~ z;?;|v+$+0iB3;zc=*|8)`0I`AL zqt`#4-Q+-I(gqD%9N+;(?KPbw&AZqB3C~Whft^>lx6~nQ{)yYq$b3%X#o-Gz5;XD~ z^k_0Bg`n=UL3SJ>!*($?EUP=%@0OrL41^@HN`ejV!^IyM%*zWMzwwkL9?BtUzjYeD zB!0j7yWfjF`We>WNt>9E#!S77qo>8|z+VwrxLCWzx`8mwt-Ao}Q!#aHXg%IZ@0JuM%n;U+gxRG`3e(LtoVWN3 zMsdtoXN2xpVSHo-qlUA_RFeWidfWKV^dQBxG--WcBz$QsE z#{DwVT~g04s0$pPr<f z*Z@a?J%|&>CkaR&6QP~dWZ(5eU#35Q8Ofr1EQJWu9AOZ_?Q6>OtkT)7v+T#Aq7WMj zloCwxg9fN#o0mfO;q7gBy};z_69sP$31jHNLjdWqmZbN>!tK}fTdTJM36y~o3I;Sn zT)FGjhiK+>{t_o2qu9Ny>8FWu|MDAi7OLe9AeI0V(-ZI0Ly%M>Y)VPP^@a7W@TKEQI|-KET)h8p`*gPe zz-)F0s=+K2w#L=^|Bt3K4Tti5-*{4~gi4VtQ(s9aLbfcUY?V+XWt(JI_I;U|N=TMT zwycvRB4po)Q>ArT{h*=-GklUossl~*pu$_Xbtz3Wv{={v3zZZ!B-sWWrZ3XDFl#zhU^iiA0~QQ|!)9 zK7UO5LzIgE%sYqz?W-u;;!8Q$+IzD#WGZE7iaDMuA1svgISm>o>DgIz-NBNhlU(}i z_QAkajTPe@x$7gb-78L8{?zRkJZInW6tHC)EriTWclkvrutZk(~G&+qLS6$^`t-*b~vXj8K9szBj4V~_6 zgyW0cZ$Xz~6}u;d!}dcZH8(IApUEop!p;F*gVe$W-kvGdmhHZ01bOD2)^g7C%La{) zAY@EP<({@w$mOxwN8F6P^7(*O@hYJ$p4@F+$#reFgJ9@-EuqRSrqmmV$`zdzG_r8Ea@Re5=~KVrzhj3JY|4aCVcCKY_M^Z?MuJCdNhXvXP$ z6CFNs<16-N*j&}ZY9iH*YS}5Kf$8+*x<5(Y7*TxeUC-mSX|jU52fMUVLw=Ip3D>8t zsI>A!l%5R^yeKEAl`jAUE;14fqZzmnIY1obRhO@?PQwoV%i>%o0!JCBPS2&%#iH96 z;uuKal*?+z?&l8AvGo>UsqjHE=+`p(sJj>x-cG%ksbve~3~MaP2G+MPgmz#ZB3Mwy z=A{7B@sV8_m<*t5J?4KW@|dn$SQBvN(~tGr%~_9hIz+MW)#S$j_&y_sO^hFGzJB7O za}Qxe7kWOOm3!sTd$}|Np4fP6io-u_@Z+6d+DbkNOG-~Ns~WYO9BP(RJ5ow_z8f#K ze{%h^j5fZPrU{H4zw(hI_!fFt*)$Q6zEM2&fP>CYzPM3*Y+4 zqhjl`S0NV2mhi<$8>QSaKw4uQh0zbUOh;M;}KaVX+O%(v45H)~1NRtxwc zdgG#0sn%+DKJ-AmTVo>)ks=9J#6c=D9IV1RC$*j45E{xe;Ofj?KI4@k1V3S}*i&A| z@#QlEquc)Ld(I!P^L&lXj!zgt^8Z>k2F0kUy+`QH9R}Y9jviIsF~S+o1n7lwEnaFl zp0XeAx21pdeP==-;=<1ixV+yW$}|V>!Wv9kE3^N-pQD)sS5V%j_d8l>t-(wjv-~G0 z3tI)-s3rB5vm788d#@?&_9W!$(!cb$tJw9T=dcYkOzq0DAzv+pRWs|1$kDnJ{&3ZK zshp)j+2(FOpkaeR9XcTTyD$D}Ig!usd*eeYzYY#Bz@?R-6vNV z=e!h$6C~_NbIXLjOh*AN;anHcugIQ^LWjjgqytHrmws=nn=roOLIDF4a`$oV-oqT4 zYaF@m0d-_FP(w1`_E7Q$p}(GUQ- z0TQU1wLl-N>VCwTo}Obf_lL%VvATUY8FK|`>?SaNemx{d9MY7 z!bk?lJp|YCUJX89BIaC3@qTQgO_VEz5i#BHe!{B)P)$DNDK_Lx?alPOlBgZ+E`VC1F$g1r3 z%O4XCRWo^h$280o&W#zq$y7M4FnA0paEj|&ne0+nXU0WtWW@oejDjzA81_aVF`L}p zSK}%ehVzfQr2qaR*xL+y?k|->tvmU#t&yvzsQf%vLa_awC0BRi|q8zXkh8Uhn zf6o3I&s0(5-w4fhIs72bda>6I!A_hB3q{6xF0VL) z)aiYcg%Ns_Wq38naS#?Bp)%vuo~pGf4_0OVLBKYqKmMdD4?lmW3EcIY8dT2V%d24H zj>ivm`+gz*%?HJ5k-9zGYs#Al4_GgoHGmy0`+$z}t?ezbda}n=Sg-ZrrhKiZ=eNjk!)$`3+zZK{oNMyxbvI z>%V#>_1CH!&4ixMxdMqSwxj{Jq@`xEbKN4dYxjcImNavG zgHkeUZMYpv!9Hhj)=`fbpuckKZ_CDO3_PThk7r!Ml<8 zt31Bd|DImj#8AUg|BgdQNadT6mN-#7gRr&t^M^>V$VtUW>0Ys1=-Jr3w5CzFkT(2sn!Ipz{Y)4k z$E5JDWeOKYYiUp}6ulfL&_um8yg)Gy6=Y%9|7p++`0fVnEJdN1W)z?()V*qQtrLPi zJ6iD1t>{)Z@gEAhQGp>x^Mx(Fqc1pO)VEh>zM-;JW7`*vH0aow?fUUf!63r)XE~I;9mHjqXnNMc+cP1}fwjy=pi)Nb~Yk z!)_&7R^6k%x65yh?q1M-`Lk`1*?mfS7jabv?sHHl2{8V}33}E7S~b%fPFB7bMJ8L< zUM5yq9Gy2qZk05%sN+K#lx0KZS(je-gXvYa-1X^Z#I6SFD5EZPH*l_-vC;Gy9N0Y? zRrA*60eQImq4$sTh)ztFG04c$Hbygne~bkrsNN&ni7e^gl1o&@ZW+#U*Rkj0e3y`~ zA5A#BK%!igfUokT7N0hg#1y#xi`d8u)u~GR?t*H!KJA`d{aitUyRox#s~)(cFQ43K zS$+3g9e5aCm;5JryzTiCQvRo*C7CdX?f-Fm{5{_y>49r0V$}Us{hvTHhuTpC*2WKE z)KSxE((s33JD=-Rrz2Zt7SJG@IJyF*w!OI&i0a;LCLNf-CucJMtloMO@YdSwhr+1y zM)?P}!H-*-}L8abiWX=p${`d3hOBzSN_{{w& zS*5#7NdkAYhYYhwt}p}q=AD4!)*hX~nU>O|Dc3aAd<&SE>1sVKUwvridlRnzfXVNy zZ*G*pc-~yh=4r(iuMXFDzM!6&lCNMdXHz8sKEf7M_KYS$k6-vJybqjfhXa4VS9!KT zz;N8e13Px>Pm;i=X#=Z@(qCnz7c=0WO?;VUs!mbQXEmurgw}Em<7V^8GYf@lDq_V$ zIKe&SOM)Sf=jfQ=pXL2m{>L)>l!wcbMnjdvro_MLH!n@|H*Kfl}H;*^nW{i`=@ElFYOOW!>}>yJGnMeFYO9VO45(r%L6^)axbElDTw4Dg#8$J==D zJNX=#S1>Is@2AkWsa$H);pUr?+LgZInmM;|Bb7CVxrHs1NXWOT+TNV^tRi`Y?vxWE zg*QY(qNA-4x1XIq@9Udt<8vI-<24#=SThIPvhge<4F=8*Nk7NsFAHYe&9Dy*^I>bK z*}=1NvRd#Be9gbrDLTHc^)=>foc-BOO}O-|%|FuY8@T^$wDV1NHy|M_J7ZsJ6R=<( zrAHG)bUjOj3&`412U0~Cl5iNMR&nrB<<=c*wusS+Ce!d}SP-+c1l3tgsh#0{Qe5IC zV8iys127>Cw}Mw01{gJ3eBeC#uLohPz-Zaql<<4!FVXUgyQW>x_18x;JfmW~&POD~ z6-a!oJh4@D=FX4jH!V7@RMYZTosT_jTNG9}V-z9v{N#>(TBhn^Sfo2|UL|v@)gijF z%qj9KO^+4elr6-Lv+eG^8-@mdeg%#|Hd_uH96q@bwxOff@MpWDp;!c^7FjxJB#o>5 ztzZ+8XunCzefQw}g(3dx?ng$Bd_3G->j0#lU)ec^QA z6knGkCk~Q-q9z3%5(ZVEVX_KK>=EQ$EP6^mV!jAYLlw{Ni~d=Zh{LwlduyTVp%aFKeb1P=-GJ9wI*B|%B=;7YceYC-S4>R3 z9IqB+Gz3^7?5SRUM;>T<6QPk#k>3{ijL$b{Gd>`81--Tkal=FDGrivozx(_8!{ zL;1@UIaWOBPSNI{H|`wj{^^3K_K7~GhWobgXE&^_Wn_kQWY{uUCXHA#Nd0ypWOl-> zNNhi!64rZ_%VUTqRSaC)c=Mn<5X$&FEYhyz+i%)IC^YA*?}o{NSG?y)Gftf!zz8DO zmJSl=0;Bdh(bkkVpSj_b3{QHtphi>V+~By(lqauS>*+TW??f-=tNL}l8eL-K z2(>^029*ru8kX1Q-ZpJq7rwf?YGKPPPBexDOq$cTtNRy1@r}Lu!UqTD4P&TwRM5t1 zK9kW{Ay@C$?Or__)!;wpS@9**Le{M#6|B^MZmG%#U+!p;71Sj+R7d^_4V!#&rlvf zPB?LXukjR3d7GUJuA^T;h>ULBK>a3pro$tt3g=wE!Z$~v@Aurfa0#9)^YLE@EA!IY za91rB3}JDFeBygRGhs~GhK6l=pXASYAy@v_-W3OA0S^=oj2!U>85o#12gIb-ttPA5 z`Nkc!)yH^MWYvy<)pi3;wgKNmv5ws{Z!b|;ud9Qw>ljHe7$8^3y=eC7v~Ad_rIe_< zS8wQBrhDYve`OxV!*Jdxmod@6Zj)r|lHS#~7gH8RXNAy@{yW|`&M0j@L}S3Ke|+V7 zMRD^1jguwnWy;6kH(@%iq(SB2&Z`LMSU-R@0x|!EL(4I`g>0Vb~|R zY68`t*~zZ<^V_b#xRC-)EbQTvxuXLLYsfoKoRR5C&j^3}u;5>s?H_jFOvam7Rnt%W zc==rX!`bR*oewx83%l2mu4smnA`|1RHEZYQtiGw!-MD)Tq)rV+V1SYuc&9HWZ0a|- z{{pVgy#{l*3n8yCd25R|zzO#Gw%mhRj9g?hp))EW?A|r5S^5cItG7)viOBsL%Hep! z8A3l;u5N{B0S1aC#o(7O3$m6aVs?Qqp<+RM?Z3~=WW(8cT`OGiT8tfJ<6+;M)1ZY6 zkFwrzV81sIHOyX)l+6ZK-Uo&YW|UG|#;+E}!SV5XaPO(Q%;t8E8hNL#{Tu^)PuOaK zCv$m#v>VjY+@wVqzAj6R*0Z{7!02@T3C@B0e91cdwJz>U|?ZE_&MUJjACH zq$vYPPPYh})srvUg`2#$4leilgs_xaOVS?M_~97~_IKI~dx&VYLV^+f7rHeQ5lW!izc{Z2v9}vDT|{*4O9itR?^+lkZPNcv9+~F0&=^q zplHh6F!|rYgpBbb)^h1?w^FbNuL^@lWp+=1z8%m;F>azV4EuriVD|W2#ati&t{sHc z9LTN&G4PdJyn#)_tZo=BRFbh?32sDZod%*XYVT$7KEq6pX3}6tSlU?7(g#$~1QT=c znbL5@zse-U>&CxtkwhqH0O}BLz}Prk;L}*l&(91#lmgpd&|&GDGva5tz;t;@DCJr? zp1weQidPw(e56$pl~VsL+i7f^=Cr~2SP&I~dHv;rBehrGs2$Xh#L8s|upT|O5C86< z9QMz>UWF-@IpF!+WU|n-_@1wqp9g$}L)llKbAQB!{-;|$jEfBRVZLD{Sz*oDYc}Vi z5^X~tHJ0WcxLJz}jT>ymuX4pYTa(@A+iz}MhyLJ`aygH|>$ZtiW=~+U)tSX(dl{^r z1p=<)31CZKhL8kz(??$^rzD#k)?gL$?q)WIt$hEj{lvK_ch;GAPxnkZ9X{c|NVb>? zY)Q*>nZ$182{|5?d!FcTs+F!3v)a6IylO{JO?R^Mn9G8mBcG`z=#*5_RruUf#~yHT zQr({6?kfk~MCyiD2Sm64LD`X7jbfY5`yGf_r7!D==_w`ppt;rh*>#-x={v30l$Y}L zf3@#z@t^sj@1MGHIs9<>cK!rc|M#i3^1rHOa+$>Jxa6YRpyt;zle_eP7Lff`lX>r5 zO}<$`>t76Qs5c+Zss)L{22@lQyn6ey`|@S)y=kx8`|8LC9g-QoyW7^utrFBT&pw9& z4YDsAS1NsnG&hC2LHmQdpupt~0|p(ufPdc5|Lnho6EIeW*4|KV^U+aulM`6yWbv}MxaY9-m~OF<>2ul|46VTVK*W4XX+3KTtg+AD zVcS0?7fi}AIn2;uV5W(3VTy6ba;3$6Q~%qS=PHlF-eCqiq4p zW8IdHNr`W9{K#Lo@nPLc! ziVCi0e20pE@^QXB<~(pAboEH%FDc;tPllpHHXGhwKI8HYbjT2>>ESeYG3jm%Ja#9h zbH{S+vvwdsh)qO$GGctE+PWN1W|_qo@^lf*#!o06w3*n`<_i0f+vQ7>P*x4M+lp?1 z6-N+jMP&~-#jpigsz8q=v_E5f#@sG$oi+q6jP)%{Q%o;?8pDLJMGr#|UKLEJ%kZoo z{SAWgyYAQ^8DkeMAma09SzrpP+Ysrq$eydeR%x5NdTFBy)y!JmWlhh4nX6%*AME|T ze2c`w5a6(xaj}Z|=T`rRa1|-6VP?x{=f$_Q$n% zc`u?oS-c?op~VFrtjZ(T0$bor4Gr-+xzUF48g?UDclORs0TBEn*E})e0 zjP8Y25EUiaKz*d5EM-+w_}|K7Ql31+M`JyfY(laN+o$H-5^|j`%XMLpKJGqcGb8SU z3~WiI?gPjbWUa-Nu><;ySU8N}Kn)w__jGuf;D^isRMMBZS@kgK(D}0|#EPK9* z^)*HgOydGC!3K+vz#7{Nur&cv!%u@`R{%(6FdhtI5M^W=K8I>@{R>6eQ2x<=#H6tH zrW3*RvQq27!BU&Jb2IW}xKkK&iO%cL$wH1&+dFj8@b#(NW}I7}r8n#{GR=8UCBOa{&$Rgr0u6 z25a8ay;Gf~_K4}6?&uWw2R?g}Gnpe*f78=GG?3+wj8`oSAeEkUXI{S?X9y4Mlx9Rz zC!3Sueef?`N+BTz=Tyh7yRiWeGJmWEk~tKpLrl8C?6yU<7T@Lt zZ-9&W658tJlD~7>WWDMD@d_0Ti)#7D%H=SL*;o_}J2$_o4D6Mf)YD|=nph+4KTaLF z^F86v<@v)0Y|9YY_vUSs2IV`K*0QakT%!o&ip-Y#Gb7lq-E1(=1UpZa`qg&B{_U*c z{*rHF5u@9#HlXSx>xgmpt`SKtdu3vV{1hlCXtlS=v*=F8u+caAqo|sl+b~Hsci38- zB$pBko#j~$EXr`#%kNLInWW>6;~y5It`f5;UZjWXe0w zJ=}CG3{QM#L(A{9VBiF576z9b?tVBa0;}8JWdHc7oHX3L&hTB2cX155p!(A>)%MiD z7%8Q1BH>BA>B6%|(fG=l=NY2JVXmf?G*9Zp5~OwuiVvG(9^eU_;+F>3`^&aq0j17? zG$4>46>kvsDfRc6voEGTJ(gb@Hk&*nv*|~$A}CqHKRARrlBA03sgodul1v&h8}9iT zvW$jj7ms6er)?4!3-14b&sS^j>445cqPv7ddvkp7 zSNK9!Jz-g*F^~4rGiaAzEDR$EVz`8G0aA#dmMtx6T52<;hpp>w0e*`RkD9oLQ>^F3 zA*1TxHmK0T;@$xq*0nhFK}B0hP-$fygk`c|+w7|M~ z+r`{zMbOCqTy2x9jFQQ{~i`x!D_OWBs7e z^({mwS|)IYJ&ONZzjE;wy9cECY!D89Tse76i3te$0L#>g`FQk!WJSXdB75kk#VL3wT6QXX%voo@l; z4X%;iPulMSe%eZMD^)!YK#z54kLk)lS{s7R0saCC0nQoJSuIxQOAD=;Tw0IvIAwUs z1v4v+Dg*s|%6i(76u?N^h>m@$j7DE)N;bcxSz_m;UUbS(i|Y;^e*Y<%Rx5<6zHuO> zBvQ{X>E@^TUdC|5G7`IJr|!NnAe6nlRwVIkwdS*&DEtv%jJ$UIuPEyb-G>`n2(hKS z&jq%WKvZ(p!3>EN*eptU-3%SuK&l@Yu#J~EBV}viSiaad+HvP|t3UeLA;Zq0-D#e; zpgo$vr$A5d;X^XS+G|1u0g%hO;iQjd;KOow{m!ib^QQ8NfB#H|?(UW%DVs4RwV!Lg zYVN&LpMQ-O{gfaFr~IVA4rIb!Dt%MBXKi?|pC%$!C3IZIfxrB!yr=p9G~Ig?~dSP%#HPa*y=Tw%W3m0#fc_tHYM z0AUReab`D>Gmg$y&f=mDDTidnd-Tlm*`42yUgbz!a=qQ(EcQ~1JUj$Hr1o8ToRi1` z$Dx#2U}4aC#}BVn7r`aN-{52V&?{>VM-_P2ASi5R+lskrRNh~57(R#Dq?HEW z#q>N;u4gSmKnJt!V9KamAj8q#^5x$}dUw{|FF2g?H7=!*IyhXv1EXty-6`ce>=(zt z$)O8g9Z?IcUb+S~+sdOZZFLnKl6bg&ab~2mRs6~4g6E9V#_dN#p5?Ub6BIXfTM@1* zZkEZd$&y+|Ujnoam0L7!*3MNBas$Y0DGe1JLLJ6)=DBYh%0>_KY;*Ir$d67iGU@}K zah~s#a=47Vd@N(z>}76EcG2xE+7J*|Cf8h?b~m#dgUdEi%aMeKc#`bQfs@rvHEt*j#fLG{vH}^EyUjTEQ$a5bV>)N(4G4JZ1tPgJ?iTcVo2OfOg%fN zFnpIO7?_%OTu2~j^q#|v3V>TQ4s2j@iXBLXRZAQ!9wZ+Uxp&eI^ak&U#gtws^hJP| zz`TLqQBy`NG)H-JVWv3bGw~;d=0uGjUycY~YY z2+Q1ZI|R|MybmVNX+f03cEPAQv=*gUrFl=(Q)5{|*gf+J&pX*)cbKAIsFzrJrAg|% zuph&K^RC8`9qFP8y{_+B}BD>AOdLfPtEcfFkYQPclmw$41XPCJtC=VhA2 z{qcCCuxl<@&XuG3s8aA>Fq;gq&)=^c_*(~bB%IyyO;Wvp`qCf(zV@l- z{%5zvlaEW!w%)H<>3-mQ^7$$64DUbOPYvWhIgwvKJKgTqAwj(gziQD7Hre>3bgVWa zNjC2h>-Vx%k}lk;S!8Up0w*B`p14}qKz5Ot0n71{&~OrR4ivH}KIbW9LU5tBnyVZM z9+8~`RjZ>NljI31Aho3NShJzJOTRPSi=(Y4MuJw0DeU95=^M4vH25tcDS!Tm8f~Nr zM!|Jwr)%NHFqq(3d61)QkH$x_+b;`#E1s~tl)%ZeQbZI?(&K~$OVA{(qTAUJ0NO)R zV#&q9k40B|e%KX#0U>&yn}_AR&$|$9K6WeQCs9ZL z^n%Ub!Il6*`1&0^Z{^9AAD4}>d(unRuG)V*Fhdcuh|&i4SEXVqNMOHAJnA(hJu|2U zADSJq)+Pv{b@J38gNb&dh~^^a0V`rte`(Z+8Q(m8tD8VWX&t?(VRO*XU<}YE(G0?H z6uz9vYxVa0hfAL^^<;DmB*cBoNZ*=PY)0Q-l4)Irlc?dwz=o3bxABq?e?|mD!-7av zgx_%-3iD{G+k+UdkGW=bfHzt+`RHqPia5E$4gVTA_G9?|zv({eWOc+OhhqBAvS^Th z%ZJNXi8}fz&&ywGf;WKl=;9vFCEaNZX2FMhd+%zeGR$|m(r{WOz4APYqX2o)Rr1VF zo8_*SOHY(PK6S9|sL@z4snR)eEHjmHp$XL?uU~jwOjG=d^)c-wLjnR1;pQU)VZXbRou`cB*WX ziK=6K?hMacKSwQJ6FA5_D};7MDOd_I`(E)47f)W#U=ID7zCZdc4Qb{u*%7TgC zoZF=nyLs2Qu%{-GfHUMEV}=_t?X~wV21WZ_CLS4CYf8z>Jy2f?1&6wA4tO%isVFdI zi{SL}=DtE&0pP;gb`|h!@AvsSCMmuP`^OU;_fA5Hdq{?;j=IDM@zL+m#M$pEO z=^naUJNZ0=cbNM1_9hSK&DVDw#^L|n%)9*Z4N<2UXo0Pn>^vJlk(H`5ueSlM=sRzh zkh2!;Pk)F4@i<6jJ2&)nZ7K{ffcGmtMRz%aWYcs-0V&WntXwu@I= zy;C|ml85K+bSyBsLo3|zx5@_jMp5V&fht;zjo{|q8G@v+Xy zn{2~MMoupOS16+7=%%TMIjw=R-L_ODhbrO2BA*}k-B!wgk6u-XCCNF<|JL5)O#NWc z-NE;=$Mo0hr7sVcBlyUw@eCpPoCkOPKMN-GTx}T`3&94&Q*5u#w zBpJegTuc4J{Xrx2LTlm^6INe$&8(ebbEh)4POb2=&H*0HWZQhfj01ZiMF)qoJ2B4^ zuRmsvrlsEe7Sp7vP?As*h{Q`mSEsYI8jV(PE`ku}@4YdG5Ei8r(or*gTC>jEGmAHL z8-*Q3-aU48@@E7?#oAxk#1-nRrgHvh%LvbWs2}ZLjqyK^8iK}M?oO%6O843AM$fRA zI7W-l=>9SHK~;hxceUx(c{T>WV@*kjDt{{USpFk@@$Id{iatKp@a@rgfdp`F8dALL z6m;>;h5f%qZ%rj)n$u`)o)>d_JX^uHE}bGB`(v{+jDr8AsidJFANwFEnrKwe(n-*N ztn!!c*eoF{eHXkeU#@bF_FlWz(^b85Tc;Q7zP+gNQG9QxbY72f9;4iz z(B`(pF9Azg_x<*aOZUdkmZ+PZn&;cmOs$KudxOI;6P3a;pP*>4P{d3EZlzQt*S4O+ zKIQQ03myB=62DY}A;*z=Gbz*~3u#H@L7B4{++*p3pe%i~3VB|7?6^_R6XPP*j0UV7 zJHU6^>FnRcfxDm^KCK9bddb9Ph!i-M7A>uytN!e-qGcqnX|A|V(ff;0yxLaP=fUwm zY;G8EqEG5w`Bp}i)Uvu$(28hP2oRk;LBL-*hl2%Be>>&T(@G`E8yj9!V(tn^+}P41K86Y1@1(^=TCPaK++j z+vO^1Z~&X(@TqbU9B|47lAL#<@!bOi>kYyF%NmjExDi!Vz8&4?$(SHv`6W*MC}c>0 z;SXW&GnS&@F_3<~w3Gj3O061%ccf2e)mnZ>aT(mPZ;rNUf=n(#4puQvvYxkpB5lro zN#JF4zJ}dQ*w7M3=iSBq2=%F3V)yt;U5ud^QL-4Eg>Mqibo4cRG*@C4M;4@jDt7(pkuro&Kmjk#D-n7RJR~m!0EXpVaia!mo6i zEgER0=H>=q%4}iOrSrqeXZI%t)EJSKbvIT@SGkx^-zz`DT?d=fW_V;*lvkbelR$TZ zDsnGy!II$$!F>>7#fh+|8FaWJU7GK8FcF)0t^G=&@s0l!)DI0VL4?i;#+#pV$h+R^ zYx>CI?ynTR*WsUo989Kd!ssPGpXxLO9Qj0u!}1BKhdsE6f|q7XZr8}vniwO^V}N3a zb$eb}R#zNrO_F#2cb~$C2Md(oHdcd3CF!7txMK*DFu4j~6P^As*A+D|2hRCf(};1K z=E!rznm2Q-P^6X{b&=yF$E_-&FSw0qVzjeRr5Je3i(BU+e4f=bq(NY3Q`gbrSNky~ z#BeQh*d5(o{USyOk?%S|P^!3({jIoT3K_G1RxpXtrZ5B7UerBK4>OpH6K23u4_45N z%z>9*aod@?2{fiMW)y&H%`B;ms=Z!KSkA=dVl8l~5z7rGL-LIuy9<+d@(s&UaTu$q zfU7Dvn$w@tYK*u63*y)^74C|eVq#jQlLL0VX|z_m;I!IQ5+~+w-m6a z2&27sT#<}d0B_e|pL6ZL-BjPtjKPGEveFq>RAjB*P=6aO(F<^+3gEg13@#U?0TZ1T7Xef#%jV_FrJwA+{vN+mi|EbvdjxG8OUUB4 zGIvLtfQgkk_ruc6{Q!vY=-J}${bOawPKjwQ8X*DY+9BQ`)T=G z#C!RDw44tQtF;?%wDSj-O}Y0ryZ4UP2EUN>9KMqFZlA8)?mUXZBsAFn_*uqIy~3^i zx8y;W?~PoM(AtD9w$E`WKot_-UIp_y8B<;t^y)#{r*A}9H=;=-{M_!l=@L%dgX7>T zqOc$1HSy7~`_xB1a1r%Z?q$|YDUqLBgv%qc)VD!t7(7U{Qg%xNx5p*-zP}?V{HYMV zf^=IhL!44Xm zCsCTcA3;iy4VEJ4_=MWRJBuOgDujfS6;pW4q4qN({|tkln;)nyhKGX6>9$eZ<&>)pc<6LVsA&0|Z4|y9-P%C#WLFV|%g)`F zu!5uIh6rOMbFDi8n1aj?qBUq<=U=>hrHB3x{A?ASi_Qd@vN|Ne$1)-SN${c|tJKuU z%8Zp09J6nA(uwtZANc0|#BlV(9h$SDwVCta)EU6jj>U9l5F@TTCqMSO6!WdMp)E7} zbc7vga0}{f-e2|5>Ur_U8+uFKD@Wi4oj##;Hv**N-h?iv4!3Zj4I{pH5obSrCNFh9 z1?xVf3KmDOoi*cpU>q`wWv^x@C+0eBl)d=r9hu zr}N-Zt-5yYx{yv+OLb3gSs3e&`yx@C}s#q!WJ*cNl8-8VVABroC%XwCA>N# z-*oqF|4AwIhgR_M>jn3*Nu9UPNF1H^RJiK!Y}9i60^`jW^A#?F+IY3XZgIi0$Wrwi zD;5$|6Z!$A{$U1tB!q`ixjUd2<0=o!U0M`}Xg>)5*?K>tZ<;4+p4#%UE+*wVj%53y z{)e}DL$$k#Sha02P)V{7zm%uo7QUa-cql@R?{7)&vVg#S7r6UhIUX!aR0z^<4m*4m zxLrn0R#9EEQcL((Wo&wdEy)PK>NVK|p^G2Eu01QfBTh8n9Se!DseL_|=*rAZ(8|@W zPcz(2mhl%hyc9mk>C^Kwnb_Vb`>{Gx{k+uqw$az)T%_-nrF`<-<56(8+(iTZ_U6Tp zf9|Fp*oY3B^iqiimx5XxTEUzG1Vj0Ix)(Klz4FZY9&V53ZP_*KJn8yw@ zauK3FIHTpm2ySyYp}DgTO*pR7N?IO&u+#sQMvB^nGBx=$HK9X9m4(fPI{w!S^oeWQ z2h$;>{W|lDCnlbUX;~fFO9ahhg~7Da6Cl<2XLnY@(;}Un2k%uQ-qsw)SeiDzGV5cb z%$s*=@mcDm+wm{C1>BWie`34}%8P){cKILAwGNw(wmssd`IIJQz`Szxa@yS<<;G&W`!{sC zoe6rcu!6T#&wu6k`+&%%P9MToYMiJKt`GBmW|ZX@-+K%@kJ|itIxT#rz6YrXvro90 zzVTIa<^H#pGQ;ILbpy_ha0TE^_4JMB!XfMbdQqwHs-K4E+&T;I_G}(^*X1CNoI}T%R|1R9J(0t z7kF4V54&5o>kf%yJq9;yZmV={u|YZVCAaef^$cvE8{j^`i6tBf;KsLd!A{$B*LZ8MhbTa0Cd2jqkwIJ{ld3eP2`RoI7WS*#@enO*SU$Dcq zcAhs^`1F3rv(fix`73wx)sH{1(+&?RE#hM#gR{TNm-KboN@Qr}&aMgG?>fGVJmjt42ig7b3<*KajH;*fp+t{{~#H!@x295@x| z*?4fm{;4A)1pfF{+uC?=;H->u4TV$lQHITJ` z;!Lw>!G$CzSRyCyYsIM5dlih>|Kc5_tvVgrs!z~=aNFY!Z?m=GpRZ!2AE*H-j3dX~ zxMTZAL}nAG_+wfaK7`Q z2S-3k7){63JRFM+b*Rr6b>w36I5PKoDg>E}r<=cfPwC~y&4-DvbW2BNdA00YEcnbE zNBhVOdT@mm7z~%#xy@)_VswKkOCxdn=32fj4@VrX0^8Uly$DU7W15{B0`O1wh1?5X z)qI1j#QSg;u@I$%RcWi&A`J$tQ-JsIZQt~L!^0Q16ia=g>BBS&!GLtX>PWs%laiZQ zbW26sSPDV7m8+`q6n(1OvqdO~in@LIq~ZHDReyqv25(OcONYU03qQh@n4SGJ72HqF zqa`{2dSAQn<^8lW$r-}|ZOztllr_9k`C9eFDtOEJw)5lki?TmR@^@I628ao)C3Ldy zV#O~_@K?sm!{PMh)R%K7pPOnHe8FIqt>CX><|#mnXpP2RZiO73P=$%}m)S3}D6t*f z3HMV#MUa&Hhh41qP0&izpn?8bj^%Z;F1hw&K!{H!kPctGdVTqW^=v-vtFbP<0SO;f z-4V9Ub8h9EI3819N&42R>b9&14hBar%44sa-Dy{sNElo`lJq2?W4LFrq0&bryy^p@Y+LTi#Y!XSnMt4TpLr0Rn-iuGOU0|@NeHhlQc;CU9<{b$4VqR>k z*$rv`9V=Y+Uf<;^P@A(A<>CnY0(3d&Yx3bgU))`wf8^V7Zp*cI=^-ynH~Ds>F(F$t z2{{kt^Nhxv;~YArf9<8fmWwT$f0Q<9qX0wj&SZ~?l~VbeXKe^0)=}UbW47kANz=}04MGtEy{vAu0hC;S`xF5=b4}v7mW0Tj z$y-1$vakF)?IQoQ%eV3=tikak`f=gcqd8uZPc5uU9xYOXxxcg-Mn5FoJz^~#g@*3q;+ z>Z35RfT6s!nV8h)1YF~*(+c{QF&KFrmG4tVTmMaOvTK(ZnW3MpFS%{M)w!IaZa(U3 zX%d#{4|@JOPXQq_YD5qYA(-}I8Z2;d@IJOX3}+yS3BtkD<1hi+H#)hjc)!Qzy}7W0 zxSJvV6*Ty)8?mNm7%9i(dWTAakAZc5tz15)TY?3X^-|VYBvuA|@>&u$Sv_k}a@jU( zi@|@|FWIa2#s6qJ%ebcg_lr15|(hnM(T!>mNHPp=2>SCtvYIs@Qgi`ppmKz*kfTR{d zB*Au5UuZpl^2ZiB<9Wu^tb@7yjbo`iV(Q4tLD;hSdMnB#vFiDH6l7!dHf%j9j8T)N z^~fIbuUz;|vaI0QV02+|-WTB4AFW^0L=Yc-M3#D#N* zJ~=xs%nbz9^`!>II8QiT@Vrx8G=+53;*YI`pu_RLVC;(+Y8Hr7)#!RNew+zjMy+jw zucs5rS6(8yG%~R0&`~$hEp^+RsZwzZ$yp`zW`{CY&Lxryl-1JT_dEP%KmK~xS$wH0 zZkRCG3NDm=)0`Gk0b=nRR>20{6ijlBHf@X7gMv3ytcSm~$k^zyU@SL_;-Oo+XKlY~ zf@%{v$C5clt%<;?@+yU_L37F*LSQITsPj*SnYG>QJt$a}Y ztPWPI>{GCd^fsyd1deqrzTYMgYQA?_oDFKY*|`Dx9OM-!ldA`z(uqpNUTFZ_Q0wilf*{HD zaaDENwl_|77R4ijz9Yi&zozp7`xDd$6BH$k)?p|`ihwDkD?ERy=^HL(l^3&HJM2my zppVltyH--{sk2F|q--Z~$YVVH16oPgh3jK*+MD&v!nid;T? zwR9=h58VIvo%+NO!kv`MmfvPMbwBdfG9%6(TyFo~_3uG*7{~^e&ncusGfC`c?)|dw z?NR&NM{G}Jd&xH!jvY^atjVW{l{lY_uzTM}-db%vk(;h(TGQ~XQu05nlIDaN#5T>E*8$DL z`GW~_%v2b*bxCf(k?tDl7W4Jji>dZtbcleQmDZl^i1s6`hOhe0b~DO{#T=KUQTUo( z`s1az64L@TOX%2B!A{OzrNGl*DPWAPX_mS!Nzp4#xuRcAB#k{= zV_y^MUwm8#vwp_v&ZVR17IsWkb!0f#9(^Lxq?Co*HF3#3x?v1ww7 zS1ae6f&=7htr*}0IKZRsbEHebam=bf8#~U^cRcBx@MNR4p*zq})2d$Xwt9 zZ(r4>@g{9%LG#u`S{y}tOzsQq5%8WKBV5ctaACYDa<%IDqkLrr5vYUIcs62R=ti`sV%LMvYvLy~nyOw#{w{QL&9n+57EQ6-ShJ-kNKW-shx z_o=7njx~<}J1F3~@kk=JfQul{K+&dSg4U9)mH4+Eg~K2m$H)k-TK*JqQpbMvC-|~u z=;Sux)N6k#iH4-m{~|O)^Oi|UjX8gq`~t~nUY4gi7ZA-`8j!INLWB)gk=%?iJQ$hU zR~P`vDtxICOx*_`eP$H_X8*X%3tlvk~jx1D?IFF4HsS zci$=Hg=DEVzOZ~uYRQLD?f&q+jcV|dwL9$HKFce*tpzcjF^?mMJRV2~^kRBD)@1aGdDU&yd22J^j}! z|0A>-bp&4aY?=JyV3Ak?RD3K?(t8$woFjg$JigK*#CG|yFw#Qvi?r`?C#6Y^)it^7 zf}YsIaKv5@`#a*Bs%XER%H70$L^9c%w^Fa&N<`zxWnEzSZ%=zHkLOCz;qkj1>HMMU z1ovu(O#{%J$J+J1uZCI}?QdeFgiN)&novLL{7q$j+?IC7UwjHtT692D<&@B_`fO$K z>}+oG*$A1U1f||B7ZvFW6%Do7!;r{FQ5vn%}oJ}DWlE1%Q{}+8v zDxzMC!>>h|Qk0FC6kvUWjd9u9Q6T1Ya~gQ53$!>vp0wiY25m(FAm+D z4!aB;jb#!4V(D=!bJp0n3%z*~a9TC$Q_$e6-*JwsG;x**h)GI@eTV#;g<|8*z>BSh z39+l<>F|r>Y5}PIJm0$Y5~?oR>A`fY@74gJEM}}Q;w0#(9_PP!r!UCJ{ks_hEXZvG zV{6tFust{eqZ9X-v|NiowioCuvqPMRAlF=@!nk4a1nHZz-BrkKtJKfl*M_;xREMD3 zWY(Va?KO(&9M&wLI?|%4;4x4QsdH{H2>Qds1)>UZ0eIE@%0hXPb9eY|u$T>j?2&vU ztTi7h`zVZajQ{IT;T!#ION+~6X2l_`ZUR@KSXm}Kjtr|`mT0kC#;Y}$xaAq!@sW`r8N5D!SfE(Gaft20StS{w7Rta zE~|=Z#Cf`<*OTR_NROz=K2kOoMD%ha_&uotcg8{v?do`(Xc~lClvs^wM&+oGv?d$7 zk^1W3 z3rwx+`rft3^TyM?grm9KK7ZCPqzZg}Ucd8!!nLp$W=WQ$kR*oo3f9WY=o$1}4 z*04bxfj5ylW0K*{oqZJ)0vsQl6xhQm`Qs$qUMN@p7{J}xNX&Y3=#3(53s%S2r~Tjr z8$8&^+OGe7w;#+9(HXrF;a5pi(PfAj`DFf^8nU63_5PwI`V-Pu zzQF~~wc=GS?E1kG&{#O{xH#gw;_%1YM`!20C%n=!TO(poC(Qp2d-mAGrO&rsUN8z2 zN(+3|Vug-t0t~ox&~0C(BT~{-8h6SAQB>Z#19B6m9#dVa7$wik&|epSsN144x@R^5 zyZ&R+FQ7hxOdf3aecO{Lm-(K!?Hi9Fo}z}~Awl{3>Pga`;f%3q=*-ou9@ohCLpWR@ zaNtZzy(HSgx-AI&hq(ZW8VIZusrP)C9$W+0Y(Kp|Q)P{_fB3X662C)}gl(WhbAu1s zXt?J2W+Pvy^zSKj$GYKj6uVM^fZcX5GmF7ncBSz@Db17@CwG>NFxUYV8N~V4E-nA< zij6p;_SvGch|eBH{G&wr^17UmJ&!KEqqtI$HMjTB#`A2@)`1@db}Az+d=k@*{*OLHGwd{=)H? zvq{qcUF{tl-2yrvL~+uh`#^OatY_56#faAFbqxV2OfGe1Jb7^TI=pxA5csmZ=;|Hq z8tYIIyV}0%6U;LB-)!g%-imt8JjM7qJ-u}yJ`n7Hu*u7-a!N|BQhf2n3h_ETY>tOZWFXq*ovosVJPR4Y=hMH??ewYl{$W2s-&*_BKbNSs(IKMwar| zN^qd6LnL9e4DVB*-Ch>_ffF~NSrelR>Kun$zQsS4aiWUyj;K&40=ctvMxu#@a-Oiq z@0kD&NGq@o7Vsj%@yJxU|o~p4j1ka7(4HSt! zABO`5E1a?tm<7WAz}MG&ju$J8D;WvMuv*<_soL^`sSRMOY0c*(~F$eEf znH5hHv`|P_%T^i{G)Dl$qL>JIgEL0W*g7Z6rG%r5Yba47I=cHhq%H(BJbN|@m~5+P zmJGs@G~vd0aYb=_0;+IouvsdJ8%VHDF|OKNo)2TK6$A>~>r}Zg5#OO*@3h)y2d%M0 zxY;)20gwF-u-OZT>rUKl{tx-0_pIO4HXq;wQK8;m-WwFJZWu(AfCcTLbXEqmN{-js z@PH@2^*B6FhBR!)XKxhA%pfqCXU}9_L^(DnsyMqH{f>}kGNWwPk-R)p`F_#pS^Vxh zGK?KA0=XG^P(GV3ZI|mG#A_&aC2e)Z2=yn7@15n4_HwFNOYM#8b!xyXJWD>yFacRy zR>Vm_&?difQX&^v$hvoG-qda0qi=*v!Xd#;Z%bgCTw52itURjiEfY}6+G0vKlkRp!{{{%V2q5YZB5Z-Tl^s)``Qr7MgRy1y!_hsu z;Hir4LGIWBI1*Ez1=}tji(KZAmp$Cp3@nR-U2R^^bG`Z@a@kqjsW0kgX+2p4`r|$@ zn{SnrKSW@0{po8?YRbmR1f53LE_cd1EUJVhcBmYg9He_l@Me9FI$7S&h3T~lVv>Zy z>&?Dsc)quqi#hie;ZqqQLf~&;os8d2nT_7Ovg87rZ}87kA{4m7~($F z2Eb+Z_X!__pzv)-gSE1K58<-tSC~@d9vd6gB!k0{dm*eHB=-x1$2Ev|Y2LXDn%sFh z?s#FXYddCxZVIJ@YJpk0$NpMfl;BJHGxf!%KZ6y4X;*mjM&mB+i!Znq#4tMU-i_C8 z8a+hL9yLs`sqB)7;tMP5Kk>hDWCiD2T4cCS))&gVEu>rAkF$TEWUW;6r`xR~q3I42 zZ0oZo&qE*Es9~)=ErhGtT9+7@qW8FzH^|JDb7l3%ODyLfLG({x6lF@ZAJDEr z)Tltqye+Os@?g3<5Bcs=`tOg(4Zu3-ehxwh1tq|*5a)hPSJ>;Uq4_ET6R>VfBP$qRq z-eTB;m^9?@T`XOu5HU7_>?ja7nHm@ICvZs)R5IvFTN{@~QXtr=xd&?60H-t(@O&=0 zL;9UV5kkJD(yKQ(>XA2pmv4==tYB7IZ2(QAT%(+fV|VN|-0*ztF-Tda`1M1UBJuO_ zALj~?-uyi3HEFhKOh+cS5JTy)n18aHd3-8Ubg|c4IAa^==Uyxf>gb;!nUbNNVb|x5+y7XTFF<1v9mN{C)SBsUaTG9K zEO&jeN``jV`TlISV12pKyhPyjS%I?aFaAmPyB5K8$~e4iD#=Kp{V$-#;PaE!3(JOw zHg;{(@3PGH(K;xI$u?8xabrY zuBw>$3&>ZmI|&&>l>~Lls8$yKU3dA)Acmd^sp9!ZDsh)(q`U z?T5aARNdme@A2IBAng?Ohv7A3nW+^uu1n9~BVe(dZJpsF&)WEzMl3l~utHBFDD0x? z`EldB78oQ6ANmEv6Ee_P+)qvcHxBR31aVA9aJ+tWX%z#2F#NzH4;#A07}rLF$T!3{ zQpv)hBJnXVOP|046aIl4(2nr7^Pj7?yC9a1j&-+EPuSj{wwJG-Tf}8Q6?MowFWD6<1nJ+ z(*xN{;s_1Dk8I?-Y)vh?TA;4o@ddENMq;Qbn2<{Y#dxPzu*@!m(==J@+mn6|1PM5H z$Y=oG&e_yRt!&kqXdCg)mGH6ZF#D5djv4IP#tt3$H-MD#jp_0ACCK^qsED5ZFQOWe z$gS3FwS@l1QV75#91rMjzvdWR?r~YTG=jU1wX-E_kJL@VMR^*b9l(0Dueh*zO0%H5K4T zoTMDa)9dP(z(oGn;j8#J^fa%-q`$u^Wu=cOz=xcKJtIBo-!`yl0(O=LYcD1$(9al% z?3@h9H}A`7b^t5-@IX}KysxaB`!2uA5Ep6pPnE8(_MWE@rzbXnP9qfx*Ry%fi^t;3 z$g%GIbS$=SVT-@UNLX#F%T_T?p zE;hT2S*!Jx^SgE@bgdlMVo3hn(5I-w6!+lHXR%=#q3muEmc)J50LMKP8#S&8b`#W+)nk7jD6M+KA*S3_Trby9+HA z((|9ddEqr7gT(0etlxR$IbG1swh+*tMr^WVGs(ehscr3g6iWz0!t9;8g<6eUi*8%8;$XkX%#lXde2hd|onRn3R672ha5JA-Fe;7%Q zd%}hHRg@x&r3a{>-Kzg>)A4U7p!0=M(6!m?;w1veG!Bz8FyoE(#)bwDp=)(F#Oqxn zvn*mA!bc+?xJ7}TWGf`V8#Xo<{b&X8a}Auo)4^Mx2vQb32{*;f2lxCpyzh~Bxrjnw z8DfOE8fmhG&(6H2?k=l&$IYkZ-(?&-J{5$wM4)>*yi$#UbDV7b@)1JT2bj(whn##F z!ks6JG`^xIK9NWzlAaobVUsas{y|}NYlp-37L6%)(McaJYeG>fpNM+3N_xTbt|#F0 zeBz2WxG#?6=2JZlnw3I097lL!kpQnlr#`TzdhlIxgxTr*#gz7-wZo>8L!qF-Xh14+ ztM2j{lZ^mFcXm*8(#senhT<@-$L(Ka5%tS9LpLsg=!F=lI3&Vd>8hKyImzQg-4etr zg3R={L5YxwD^RNL&(>8Pv&&E4;DR#-dZ|lYnx5tMKQ@f@_&_2RP#On=_@OQgfptgR zwf(yFQumU!~Z_k?8KD%^xo@j}#i->;!sZB_mY*%St=Q zQJ>lG>X;gfVv3haekidf*8MhHWqV|)ZuvISzVWZdugFWG<3pk~&)V0Rdn^Cx^%`Tk zgsIzj|1mJ~%**ibA*}JSOWVldz`@nfGAgtIUJTH0XsF^;hwlH#-;+q9-sR>>!4=S>(0Ue;>*#n1uJN zW7#13{cI!bU^xSm(J=^?9^T^M*Q^355F&+~m!6gxS5_IqPwMypFp>|3?o3j_517&g za`tp%R?E$@+26?}3Z5XT&Kf6a+~!?wlR_VqOMl*fx2m`hYk>;oV7!0(;0n{hX<>%F zUi2&;*}N>LuntEJY1Djeg5fSB&i7Wz)XM-gF5+_8eK*+0wB^Ho3q3LKF@qR`bW7l9 zc`e%XqdWvM6$|1yoLF{!D3%ODZxuVvz5};i9@Vb|L@2p7lgZur4>)$n-V^2i>)sPR1#R$l^D)^>;F^{C;BUp)O+S=_?vwLD7D;HNn$R68^VYksz1+w;aA5MiYmOupBL``J1@2g4NB2rKoReJ z9{#idYT`$YaEeIv4N@om$Py_XKl+~gk*U3^Ed-PY!wK)-{1-T2-w|GXHEx(--N~IH zVmZ;N{Gs~PzbJ7$M~BZY*dR9FV25k%a8;!o@{&=)oxX0e=b{XeJzH?Itj&mUoxz_FKUGo)l}R=*X2kJn zQW|7$XA>&!YKUEL?hqhjiKN2Y24Y8`#CsU^c zSppMP!1OvfNEk8K3Vxj~Xzqux*8N13zw@*{_)~9-?!VRydGclFW`1#cW$IDQ`|C9Y z|pDg(ce(7xIi)mE)I?xJkxujbn#eoSGR?%tbf z1dodEv~LWmHT4y)F?DjNHXh55i`)Dwb{#_x(OS7d~$gy5N=VwU>eeb%FL@DKJRa~!zn zR6c=56))B_9!Lu%wd}lcH>Qk>6P5Yp?ZB+-cw8z)H?ij?S~l*G7Y@x~Pv8hC9Lg z8Ni_vCr$&qzx(FNXZ{i_sFy8b(m2Af34yC|{gSBq!SH%cJ;PDZ&l`W|95pwG>)EKAQs5GUj3u-RJy-?_N{danPZs7#gJrk_?`fDq|wEKC~u~wwQ(nEhy3PCEw zAd3>yRx_!)w^i4r7R@9{$G5Wzvn+)36KihF=Tk@TmlHJ@)|M4q)Zh6_a6n$E5 zO12#HrF-qTTS#o6d2kiBO?kW&eK8|PPRVOcF zr|kwJJ4;4>@EKMt1+V7(ILbdY>mZd*V(EdNpKw9_k*C_;SGRKif%BX=D9&t;f$VaS z(^}!%hKc;S&j@m?w~_jff9FqJd3*Ep+Uha1&%LaY$)?B!Hp@&w8*_7yUwg;UC%5iB z!AuejUEmr7iClC9z7jvR@Ol#e)#G^!^g=CcxPw9hfuUjaXt`WK)vpFMr>qh+y?N?e zUuJxZih;y9w+95@tF1i!ys`!dHcIg;xs=xW{tm*u?!eo(d*ZtZZQI*dCTg3KR;gEZ z$IPxIovvERmoL}}PdgKcPJ{bd0zplSa`+5ip9+|}q9`jYDeOOWrIBF3h@;EzUc7@I zQUpQ~rpD0QN+(v6IVqe?-`8TT*QhEv-tkTr93>d_0JiaqNP~U+^CNbm`{0y)un_l~ z-Yge3j@Hnv&S6QbhiiK^yv8qU)p2=roPz)js`MUiSrbx%zQb0t!TtE&VTRBwhX*Ny z=p5&ZB~34jv)HyIzWW_#+*=X-G056t-+7Q+Z(N&y`TcU4%B;DZO-m};tawsNHo)7p z`CYTYCT%mRQ3Xz{*`@O0UpBds@8|PWOmeVHmf;o$to_ptOkjD2UKX;L9O$huz~Jsdo-9?!j)*O+KgJF#OJim3Kj`ftSbQZTG-_oslDFB5I$o z`*Z#|=D~qK-tE*IL%bes5{018S`8oxtshLjLn}4YNNJrnRs*>oP2p$QSoSCGln~D6 zUauj<{qQr4A>pK^uB(M-LF9clPUtD}P?nGmBiH@N`C1 z(%?t0DhX87=aH8o>TA>Vuug71N1NTdP00ebR1u3c=`Tdv&>x<{MNu{yXbZ^zT=CC; z5RZkWdvFwfPov1EqrVpeA<4S}=*i7w^vvE~-eHbAz*^(0cRcKWqa1Nuwd{HV*;A)A z$213mmOTpc-n?|*Ia~6btPV`?@|jnoe>DN0tk9h|GosP)zg|P0<*R+v<}7JAWHC#%|*Um5D9w5s3N? zjfRyUgVft|)u23fU0pkMrav{Vbaex5dQX+gf)#@_K+I+kgY61t_U+;d#81gvLae{+ z=3J<2;NCvhvrz!0ghzg&)|nT5-vmKK5%@Ug<-b0zTF1qcBg%xgm+hJ~DQ&Gy(puSrDDhP+5N7y12uU5b6?HAaI9u&D+&hTYP(G>Z$q3B-j?Tn` z08!_82OxQ!A!z^U*h(&?bqZkeupq=W7_F&(Gr{w#R^(VVmVc>40Qx|jmCHMG^#DN+ ze-9}ifJIv*jB-AUogH|BmNLyVh^_GSZkaGWg5|o!40_%Zk}A6#v8Y zP#2BfyXiG?g|2_$quD%rIK=iPyJ*w=;TCRCh!F36{|MQ)2QKPiZy&?}u};2V{A^&? zqS9fzgs|>=)BLZzp2u=iy}y1BlKVxE1YVWr{KG6&sz7eO@ZsDlj3>V-eWZtHRPn;S z`B9qo z`X>zN_k~YbKlwA7GtgkHr%HK!$5R_!3C)ZI+!BRtFm`D- zHwW2M@NdbgSx6@Q(>TCM34*nFA=Pg{m3UoPuC>xFVfxSg@ivIWdmbEM4oUS8WyBD&@6d;>7n% zBcpEma#Mo!(~&ViFVh_1N+PQFpd36_+*D`3C;gZFx6O6Rd>HO?xECijcEGc}ullhf z#{U7H*SFMJ?Xg`VIk^*uFu_+`j)~x2-^Xm4SYOeDs^dnxe-vcxlL5o?u6Y^#Zr&;N`X1Ps`!L(F^Y=8c z>2#~fF5@WXfM9kdV4M`%vYBLyLyt|qwEX@qN9wx=A_DHW)yBL~yR9ak!jb6(N5u3w z3wiX<8v>VYkCHKjPsn>0AxFQU(%Tu$TyOs@>8^hv9*DVPMlPabjqgAtt$=4;|Ij_P znlj?qr<#@oehif-uOOnA>MF;Fe&%moO)IUY%gAMHc*i2`Ux96JHrYo9KQkDt_?;b} zm`R0{P+m92MZLX`{qlI_JUH+yK|}Z@&&ZV2MDz1Of(WN6s$$+FUX!{S3NxpUXU6Mb zXa9F0)-R{4Rk%7fME5+zkt0>w&Nd56nmybo|0j3Dd<;sgDBurS&HGQ=-(+G7UOJ5@ ziFvuL>aq$ZN77VBQ4MbXz_zyC{O*GLj87Xb2b|*5G20J9F0L;!n?KGgj=qIex(R`I zE{-)>nphJgjcTieAalg(-@QBYB4F-!(iV=VM0}SWj>bZ6cLm-DtR^S#@(1o0-ugD9 zuYAYDn?Fc-JHb%wJN0tzi0=2ET%)=0zTMFkXFvk77`U*cM6Q`thb}T~Wb+)SjX#|E zt%$-MHiMe>v3(R-=5`mu=Z*bP3_rBfpQE|5>JVMocT!yq{C)W7gor%_P|vMT7}EUw z`ej#&3O;w^=3M%s%F7on2$BJuf4m-NRcIKm|}H>f^*ANo{(WceZ}k zxL$+NR1@Lh8PCy4|6%!@V<}TR-V`Bg3a+wW1fP7}{ltR4fUX9Y3R#DjY3gJN`*xTm z$PNt*XOKpGM@EI}(ndv5jLNP5?Sgw_DRn=cMvo?dq@A>Ik}>J{bXW?3U|Z0}oo0)m zO*3xPjx^q&*YC-BURfqa4_Pt?H34;ja8Hc4I9LYnCkgiijdtn&1dGJs=Q}5L7a5wt zUI|b807WdGCR6;?IySOgNf`+aWVv}LFCM+g!eIVjSJCRGn4xa;Z=%0uR2n(LM~+jErT z=mXZ@rz59^l}69_`T@&M{Vp+jkAwZbgt?EuoU&JCtT#-9w=!6F%|=rUqW4ivRCO#xgz_&?fXYw zOZjzmNFPgKBPq%44k_tV=z(3^Vd&AQ95tJM0T@mEiT^v;G0%D7<@wQwW?o~da8Piq z^CA?#pUJG*w{4f4Tn@8V4OqA5wgwzQyIlx#{#JMf-;G##Jo6IATkGX^&1Ei8iY#qU zp&PCRJjFE&gXY&qV^mspAE;&GOmd*LY#EUzJA?rl3|~MBs;(EJ5@SaXX@_k3s=o1J z=dO&SzD=%rteDw#m-Nu{ZIeU13f88}o%%tCCKB_PiY6<#zKDv}tt+71&EW$j#xUAZ z;FjOdhRf%N(o|oYzf15Fb}x_ZlnC0Lkb3wS(OS z9ADRp;eLvC_GG%mtUbi;f8Oup?tosu>vgraIM&;Oo*t~VUli}h%t$>uWq9RCbIA{Y z5}q{q0|8Yk>YlI75Bi`Dzr)#Y$7Z(LB0(eqpZB|A)_k+0`Q|j|!dft9(9YcJei=>>oT& zc9|$BU|thBh>o`YNV>k=I_LEBx+_RJ|-tI&sBSH5ns8NGH3v^72s(J5?cGN zdt}KK-$0}kG9RC*8C2VJXszM+D`1OZha#L2lKK>rrFHQGp)%Zg0&ERhxf41)FB;^O z2-7C&JS=>p$D-o(?thq~4pJhQ>QS!s4-1*3?{QtwuobvXs-y}R0EU7m)S?7U;{cW_ zCGS|ek{!&ayZDaq*|SSY>qoU}03GL%x+MF`&7_F`-Om4@!@`(xyk@Dk zEe(@7=TkXZEpsep+vgZ_V1bTZ4uKZa)j2$A1af42<@`N2SwqF>re+`M$oj2=fX}BJ z|0T&)o(h0gZxok+(fN0$EX#FtqBYhLmt##3+zR1ojYj&voO9S?Q_TFrOO~cXpNOND zEc?;D6@#mRAP6l31pw}hJp^lnzwbW>vh&@^vZKEOZ(Za_+> zFda9#(jGr&5(zL?@O9WT@T1{ma(v56V!H#Sb{8L;rr72XUzpj5`1k(p;*wfWE%8z0 z>dZE+2*STCYV~vUUt?l$lu;ja2lP&Z`sMTDJf_!J_FAgTq7dw8rUTwuUcf0>m*CQ3 z`&K6jssHUs^bTr(H;4pSS#DZ6ceQ9lESNw%x(j@DX_p*N1n<0@?R`n)k+M#lcS;v}eDo;S54I52p7?_OKv=qj?^(Y391BPW&g zFfxI({bp7-o&}B!Uv1i&P0-9l&TY}JS^uFpB$YzkGr1iazv*P{*`d!9S=ms~5E|jW@HMr#UxE>A_-^znwo$vvA^K@3tw0 z7@pz=^{+K^pwZkcIS9>To#p%00z-3#SO!2#jGs1GEhtQl2TQnP(L(}>wcxi+Bgar{ zk;(F6AHRc4$qX-XKp+1SPF*ISvu|FmJDPIOv7=6Vq^>pF>^jQlA`NF&#|L_UJZZ=N z)3LVcM_l}CMwtZh*q_9PsaqHgI^!5g82igElm@k_9>nlFNqx{-1(^fW+ZC;nf!8gw z=8)j-y}Wx5ZeDY|yMg+2k!SEUd7tKosbMIbXIp*OK$;C({_WpKoNDIXP5?tI+$2>` z+!nj0+**4A)F@2DudP_IaGU#RB5c*&_gU2=v|L2d$bbcAF3LjA;orGG@vZ=U+dx2Q z1PP6hQoK+J`d{aDNKV+R>c16duKn^kJz^!vtLSM36v)}XKEjBJ)||Vr&q;RdIbr!zvPM0f zD01NUE@X4J7=~+P0bZmjyllhtN`vJNt7vI&s!f;d?yU!z=$bPyBw){3HDx_6Lw)5g zn{iFYpA&*PFe?3AECD+ztF5sW`OaAMly)|W1_n=(;Dh)p#6zBZ2*{KBeXp4ltbP37 zx3#|95q#!chqEZEqH1vT@c_;kdHg6yEotI&nidlq$0B&w0T+uQ*;+*K*a#(gbbeY4qjCE1k4+6 z&4!*8|7M%Joyg6}xBlF##VL0>$j|&{+mR2Dc-|wXpo7#E-!{zn8h4v@VSCB*l2W`* z;wQ1uNgHU#;x(Qs@gxz_)zGldu(|#qqwQtDeJ+|fO4#2@(4WG|?o!EFn;+%sG-TjM z5!_6yL0MiO?DjNA5HUaZC8RXh2kW>RS{O~qf6{$4@5-U)HJyH@{gFVQ0Ky~zo#SMJ z8$gzumVP;RCcOWo@xQ!Ht98vo7Fl!1$7ky5Kni@V`0-d%3ivLkr*cQa_pptcOw6)u zpp*CO!BJbglc$X&znurjQJ!K=Y12zCH3wFfV^WY8> zESlH%>(zgnMxgxg%2vd1iOgT8raFU%5$Qb1B@zW z)vWShdUoY#s!?$^p80Uo5S!js^BrGddGT-7lR+^it{R!GZXH$WVtcc`Q;G7+OVpU}S0DYQu=6PVNjH+#A#PbiDZsE5Au^1*S|1_~|mfvYa^Xt8RP&?Nmr( z$4y<^4gr*RRW@g~1cMW_w~F6}3%NO>q`4&p!>5c`ok|S$Oa@&o(JK1b%+=c$n2~E- zJdij%gF2fib^T*h+XSs}(Fb_Jt}VA@vyU1#h7){y{Y`4NkH~y&1mg6_9*NU$~4v(pqqvkIEkk7uv8Q3Y|+04O#UM4ZP*+KXvS_AY@ zlo+Rp5dqf^yXHAgXrXPHTHw2@)h-fNXO@8&?LtT_ru0m3t1%VO+cadP1+s58g~RII zsSiOnd-t)JTqJd$!ONag1|-FUpv~F++=|g|=YLF6xNw;ekoF7{ycc?aKTt`4CFz|NxVebv}I<(ZF%v92?^^SG@ir?A~xfmLOOkm3k-ZS%a1Zg3&@fjcB zx`6#owuN~gNa7I;2BUJz_&;xW&HF~syy2^BJ5qP)hPkmX4&K5nT;6)Rd+2qbReL&$ zy#V77+0E{a<0i37e$^C9ZK9X~dVD^0r)ii*I_~xoW8u`rbu5I#?xzu+!Vk57sbX#A z#INw&pg-Us#4AwNN!_;3=A~p%`c}lD3K&>W>d2A9&Y*mAgHe*GMKpOV!PDDaPg_V; zV12oi5_dX`?Vv<8c^IK*iVw{4q%TE&p0g)czMI7!@39L&FK-GpSB z?+gx1p}A6MzRnB_Qn+V5Bty=pow4^m>2eGo?~KwI-4R3_piEL`7Q`2C34j{cjcSWP zNM83t!)r~vFjB|>FDrv3hxY$)phJm|z%PKQK|$dWr2U2R;uqM?Vf_pu8ER9}M{Z9H zVdC@jD-yQ61i^z5wYdIWI4*Y8$Dcs4q8(3-9$|EvWJR_vd2w%qWK(Yj3e@(P&W{`Z zju=Fkx=wsx&u7zI_+0)H$$I&RHEaEF7h6o=_z_7=PW?64=8EKjta!l}KDFl=M;Vqb zFAb5$>Pn5%Ll!;t4UfQb!*~98V^T|^VE+jeyul=cbR*APyw&mI>*K}#e+zyo6nxmX zAJrB~EVR;adExs}E*+xAC3mv<*M^VSar1LgtHx(V^vTQ1mBIPgIz14A3p9!Q4bOf!UjDA-NN@(kC7xyCv{B%FNF#&~m z9tWY+M8#xYFuYARWmU=06MzH+63 z;9V)HsygS%0>|CszW73GzZ4si{YhIVihlD>Qizeuoyoay&yOygb5@&{YtkyhTGTL_ z)CZ8R?24gGP(z=&WOtIx)wz5;5zd|!`s}gE0T^Foi~Z;%`%$yC`q*oFL`xJ+hNa!4 z-SbFhpN|H{y1@*uA$ahT7inR%$AC#3?(0;PppQP}Y%~iI1L!7$J2X$8Siqlx#gU*5 zE^OAeAyIgt{0^?2+~`ZwJA7t^=F=p&GqMP`_xjD2BBMgR7BQBT`4PD=Q$Jq^YW?qZ zg31zi4r=%Nn9=@x8DGhLy;Rs;xO$&=lg~ya)N7_~1i`a?wHZNnP`YkyiU_g8``Pqy z`!lthKAXxksC49Xv7vY%^`u1^?$;y3AT;6qfTE08?L(NEw9moHZ{1X)mZ!Jyfwnw@ z-FG?79$fc5%2^@mcZ`!(U39pEjx`|Yr5 z4B`G71CPUqf9wcgg$opS)5HJ0W(9ZmP??G2gmIFJ)|@OSYxcqGus<3TVC3&Np*4?! zNFPV*9=VQuI^}i>Pk0eaOtfmsuH&B)c;5vZo4_|8X^|t`UD{#M47_6dO8Y`BDELn! zuyF{(r(Rb>{&zY?J^Ut$nB;&8!GA%Lp)_Q%xOyYdvnz^w*Wl&KM=QUB33xh1Fz$CB z;+`L?OZ)RxX`>Xw$M=Yii=6Xf60*8|Z$Jryx}k&A-lq`xdfOOCQyqIh&Q!XgxIBwe& z$$^b0WiGkAoia>Vu{#g{Kbo#Hn$7p`+ggg+o7%N%Z%q_MsoJ}Q)>hPBk+jraZEdPi zsxfN)thQRQ_g1@Y5L;DZCft(8|Hbn*=RPOr+~+#?_5FTk{yX?d4iag$>GTZuVQYnf z=HHhKe2t#OMty)U=M3rOlHe?DHR59Iz^6OViSn;jOKA|ht&uuV_;h6nzYm^YEB z4p+qm2w&6n}xsKfZ;^NJDJpqzT>RtC?{;7(Y2ehw|lEwlf$P4aQ7Yj z2(spJ@E_}UFjs<}=3Am7{O(Vkh{kP9#1;Pf z?l1(#vsa@DaP0M9Pd)Lq!-!t865vS6_BSxC3{Fg$9O+M<$w9w$woB3>KcZ&CrdmY%JKYU0bLLXD^TSeyJyWsaE%B^uWRjCI9FOu&gx2xhrB`vR0tCrcfV=GcPu7*yG9|X4; zIaOr>G?+c%#Q)FOWzal%0}J`w)#rC8SipuDe@rqa$`02}??A2%y(|tezXA+7byrKm zh!2!(rfL70)$}qE7R?Af&4)LkmYX+Nfl1BiTdnFA!-LRz?eW`tcw|TCR`H6;@5cZ1 z>hnmUyS)>u&Gjhs(6!8w7fbVI-u!h~LBySb4o4|?3S>|ae_McMNg5-al;%a%`Mo9C ztti=v>lmtVZyfIt%HjR+=bChu&XrkM{U#Sn=?bzCQ26F+M<54BJr{OInBEYVYkIZc zLx@|86Kqa%Qz+1Zz3!O9&ywq80iR%p(zy1kzl2$VZp<3(Es6h7km)y! zC{3$hS7s5mbV)IrpR{oNtrPc0{OUG)&ca~bAD9dGXl3h_ne$y#=^_wx_ z>2YcEj>4(cdf=rqP9&a<(^<0r`8i4U(vGCVic7EA%}rFF3Nr1D?;E%0tn%au_Z zb*=iQ#r3Q5->H(jqilqk0ghZE*)4VzqQiHqgsbemDiL!jRWViziMb9~LypOWwc1W7F(goA^oKK4LB4o8%RJ6YK< zJQm2qXLicJeS9)8NGUZt3NEh@u_H*z4!Ran(!(A~NVG7V~w)QkvxW5CC09y98ZO%P9OEZbT0<8uRd8rGJs$)r?18WJGwH6{A9N zmax9Wq}vf|t79%7D^C#eL%VdSndA60R0ot$9P(*AhEQQ=7@*lqTItxPz+OeDgQicn z#{+4kWpvdeE8Mm&TMboVb_Islg0e;jgLqH3rx>8Sx$-iU-OLjE5i0|rhqR`tq2F|t z5?D&wL%iTd3q`ZJ<0V=yQ5V$RO*=R0n5rbNG-YfD$BqCuU0h#c6oYM)U+WQ{ z^NY1yPl%Q!)@aknZ-nZ_^2(!*5Q##zz4aFTwpiA~W$V&Gu!3)+V|c6|DH*bxhA)$B zSVc6toIEzTmo}qzHuwKo07vyafW4rU6Kls>V_({#RvmQ*8-5t_9s)RzqbkoQ@K?pq zdD zirMw!DNhi-MU{95n=3y2G|ct=!)y-VyBhSS3AF3&o3+0!ufgus*@Cm?DC--qu({C$ z`U^Vmz0qP*e8WHzs;*$g*pqz9Usz<5I_}|bI=6F7MnWJfm>QN%LNfT_eb&G!zWs1! zi1!%T*AFa7t9RMhPW?*|uMzK;Dpuebhwa_vv3J2XfFi7?poPe)N!!6Tw`A9V>IFx? zqVYSO*u;lSfkQIhQt{~S7rpKQNmV%esMB1f2+eC*gg8hekr0$ z5ulw4aMT_VzRM#8bo#jej=k&ZNGggBUWLzl90hg^A#7z)3#av+YQHy1>%<_AtxgnNu4a z-cj`Nf&BApc3CLK34`5Vl9Sy5Q=+-aZjXJfH)wN;z26GnRr9Y^=*yD(+>{2R25-MG zAC2Wn^Hc!_^ZRy4wMUesx{0zT%b%l=L4*7L#0T`|kL%!ZL5YC$CR)rS?_I>7sG(B} zLw=3$zv>Sd^+xO~R;!Tji6`}SvmcgP3A=V3OXaj?6l8cSwP0@-2h z94Q0xj=z)Z%Q1Y79-~-y_SCMi;lC}h6a4UP$|K>AN4ADM71DDvEL0ct-PKLA)o&zmqbjYkswgr7*mN)M7kb zqww<^(u(qJN$gDG4q~X<+|85LO`FCXP`MHN@?~IP*a%?}u5qYyrx|CTF@-R=2*!+=GBGUSIWYuICO4KY!+BzwQ2=XJ%39 zn&O+MxKgQ6eKO?!RSh;Jm|s`(C_YtKzFeWxZT+Tk5Zw^fT#F>jeYMKIHZmXG!*2Sp zwzWG7O~$9AY6qo~&FKO!x06+BcL^yL+%p`DO8p}SwwCYTCQ{t+#F#6uA103sD6IB* zAInQx8ZGsL)fppWJ{XbFP{q87jVBAIHeI_B_?CwK&NS8SKi@x&e1Hf^;R=nOb;%Dx z;suql)E(I9XXw`fu(AH7Yh{eOhuu|3$lwtPF-SZBYvPSV$qI%2TcZW<2*<;E*0xy}qOsI2)fLchbj%C-` ze+ba?5~)$!=sL3{RlsMf-7}njn6a#}q7TB8056ya(%>EoSlR)qIW27s;dpvdX_7ba zNGwhBxSoLI__=Sn8s{2s0Qa3XO+kc1{jszne;;;*CPUBrnLKc>5);yDp8b}QMNWi6 z13x@@%u<52RJ(^khbG`;WA*PK5EKqY+Ruh;;}2Yrpz-#Ke|{VFB6=@nf9}_>uv^`; zXj~BR(=HN1qVZGF*OzX~m3An@fU^!#Y;M7P(?Q2Yz;|4~@S)v{m~RJP_D+=^fmttZ zh4eBT%5-e63ACBTh{EN*th7BH$gCJjzR#htTUp*|;n_~+;y`z{20`4V(6^BJ&GpN1 z-68>_g17$p_$8mG;=)OHz3+0xtnTAVaoDsSD~XR(vsJ&wc78aQ^Q4|rrf1ZWN)vxmt^=e z^WnCFT>HxTK0zK(gK;|+Xvw6xRG^=Y70?0;Ml@J z_XZys-E(X?>k2eD9Y$?QyL(G`b`28x;fN8sWB%d$dNl3wpT7^)VC6Nv%h(e=KOzGR zXBH$L=0}WMQpForOnAEtM>^FqKvsX6Zvdk)cS+MhbL?(j!rmNY21PaMYQLp{*jilS zYSm(07NxWYrPH}N|6C{jTElPwvwFhiFq=z;g!=8EZ_<37$d^GQQwBE1EGgtUa9-HX z=GAxle7p^1s^~oX#j3e-E0LG@{J1U4qUm29Tzy6Gi}F+bpm&+myFkbU z;iL-L?J+p{!I1w1@mA`5nX`6%y~#Cz4O*2LP~kRsZ>Ic9pJUfn!=_`}Wv|%}*eCN? zZP)!!3sEDvT0r(vl=u|Rtv+aVqz1ftBp9nHYKRO zDpfK86_kXQW!#V=N_s0lsY!>4!TS!@A?JP*XrtMDl2w{9gkr=kGB($qRDsxOji9K-T<;CX>-H13nk{Pz5*Jf)TP~{ZSYCFH zjdwkE#IQe}zq>cJV3_*{@Gdt(>U zEB=M!sI-TVId%svGmp~di4AU;M)rb-9*W!Ft&{~UE%AHpuWb;64Bj?uRb}v<`&&tn zBzSz@A1wl{VBCvE<9_yzdkaJ0Pv%LzWwuv5DTk!V3Ij#-^3T4MCVDj8p1}^tQ2S+(2-{YDX1$+GO$2YfO|Jg91S-NwB!oLvYDwljt6*6U zql7Fb7$3WdFrC<45C+U-sI{WpXCZFmvu6)m{EY+}dM8(9HCm?`E5ZcD;^{u<)+}Savxu{wE;+K{b(jQ{RneS!sF1O%$8a!Kfh)9}1iAR00UbKEf zPOmI1626AyK}(IKlhy;hfC7x~&7fzDsc)VFJ`G2Ki!%7m+i?RXfVGbpd(>^;7$^;R z8_k${rZL-s^}ko|www>Y!21Au|6Fawrz&!+ioC-4 z`1IXJDjuq!ex6s_&z4RlT}r{@>+6~kB>Z*V5|j&DsizZh?&=gQaB_$(wB-2+;LA|0 zlMWK_&V9raNDZ9|{Je+>E`5;c>9OSj?{6VZM|*}LQh)motEjgKr}aKw81c2vcbql7KT47%#^`DNp{ z0fUCZKw|fd=RV#88h<|!T6+9)!qKOm%&;@rqVBUg))VSYXzN1y%sSD+)qkGv$+cG5 zU@s2m-)-y3UeHuoA;O8a9G|-nMj%#r$lio)2`X|5|2Hi#aJt#z@M3S91+|Erqp025V}K;R8Tff%<(etvzR<7AYeSXlwNqE?#oT!zM@4|m z7Q+~%gJz+Zh9}NO`|t?@lx$=?GV9(`#wcR)^ubrT!A&2lBFo10{F^RAO}=rXkzRE2 zBIVkBx1vg%dH$qXZ9bYv%NYMPuaDtDtR2&X`yLE+B+Tv{k zdywyAqcZi8D=`sdc2?Sjfjd^VSvj3IxxLqBz1}PiXvW-1IC@flCv0Kn7MsacysRF2 z1zU!AbO&#JR6=)*`&{3>bu;j=Gif@GDVeTUT-Dxu=F0xv)+5AI`2>Gtd6X}(k<1Cz zNH7?KEVa&*4Wm&&pd40BE_jQVk41WbmwCEnXRewZ}3?YMDoz)_E!rY3U5_@PB z5cGVm`Po-NV(H0U9I_ao@^#p zFsp=|PBP}uPqQUD(vxA57>bSBSDv=u?}l7fi$2jLiutA>IAEN#tYBe%7yqzQsqq^k z<_v}#Aw9C1VQ6ui8}3lb$9!3~=QYVxr5?Fb24Z01b1$6|r4aSpE+mEJ9g@$BKyG;+Lz*Ee-b{*S8?Libxa< zx5BDxe@tU9g8bo2z5g&F9sD9156Yg`X6ikQJcC#adQ)A&7X@Ed;V!Jo2JV}XqpnwS z_@S&nc)s8ju^IAfpaW-#rm^d)ok*pEJm%u>Cg@c`%LFigp>g78AbUV8r5n`{Mlq#M z*oH)xK8^9i)jyNQpWY!I``oqCoLhWho2TJOZC`SGTy4N(`#C37#V0yb3t5)AbM=)f zTYB&w?O|KLA@_d}Wn$VmSm7;-gM7qFS(ov26!Pca&&t>A@PLu=?d+gy0MzS4}aXU9Cug889ZLt6Fl%H8LxgS`T&2 zkUM45gM{U!*kd1i^@p$dm0pMx)bBc-$LBnFY1A}- z2FTSG{|(g34SpI<_|9&)hqsVJ?P}u6bKkI1{u~fL<_5NVKpvI86Kp0T!TE_A@S*d< zZh1TREfYVI*pVCsm%c{Z=JHHDQt>+779@(BWl68a^YYnCkY&)CPLPo8(E!!&A*HZV zZzxgh=-}7a$d?npT>p!!sfDz?YfSf&K#!uJ`vJMApF_hU=W2xe?ft0f`ZYsd215*_ zA27^aua`e|7(=;K4qGs$K_*aD6$DcN3Bj-?&D8L32N9&%eh|=vd99{FIi{xY6?^@F zfrCWUS5GC3;dd$6L;h1%YIf3!pEy?Wr~81~@Kg=9z8@|%nEl3HI_h`>{RS0O75g-K zXOlx#%wi9nDHrg~-$^j|ooevgH^0m2#8Fo#?6M{UFabzma}%dee0Nz?F|4Ahw?`p| zUFk;hqMCE`+IzPI8U@Z%mqYw-A$Ept!pan7Jj#0A@QUY`Aj5KsAzH!82* zEmu>Ok$IAt>#>!;9<*{9QSJ)P^lY#8Ncx2}pDcA#$WvPA`v=00QhCzRT4kGhv1>AS z-D&P4|G|Lb+fYw(zxI?u>#^hq3jrO?TLpJNDh%`pwlz9ew_sv^MmZ05;Mq7QRiSRv z1AZ&ZEpQJgWz|5GwchETPV2f{R}I18Fzt=a4J;_zz)PAi&1{lVfgCLQ_%x+t5`Z z16U?|tg(`vMG4LUn{aG#97I!1I6FXFs?>}!LVr#we}nGheRj}!C~Pf2#{Q^gs~oI) z%nH85)vrkqYJ97`B*frO_}tI2#}7OzhV&o?GxL+k6-se<%w94n(w`hRH2Fj%`XO{J zr0rf~uPo84yxQX)H`=;%nxxWireT9aGj-f5DjL@@OU@0D2BLw{Cl5i|6b^~ij)KGX z5reie)z+`OBtW#b(&LQ8pHryDdDRnx_?3mpPiBYtmp6|kh4r<^nga&#wyR!HJYoWU z&1o+imz^v{*VDYs$~)}9C&q)D8-lQV_X|2F5dymjoFnSxw`iK~JcY{NhmtaXKL7Q8 z*=kOrh3z^le^3l|tw0lgUuaO$ZRG@_=vLQxM=|c8n63o>-i+;c}gD}GTd zU+P7Vmnts%`_LCgqUH174j&GE`Ek6&3ag2KA(lg2`V*^xXYvQ@=qfW_SYZ$yyp>;r z%KNZ}hqR18OS}3tLC#kTYCuN#Z;156Pw95B}J{-vUG1sP;UO0th5S;U&N(>22 zgBQXjUt3|&lfz$;=gS<8lF!lvp;mK&{poiJSQh(bJmtL;B-o!$!GGpb+fQ!|xsqmq zmA-Seys!#B*Dv9H2xAi|y3U{lF?UFj@QcnOMJ(Y{41ATcF%Z{-Bb2Y8` zxt@S|{`>t2PLI$P7_^@q5Abp1Ki?ZgjmWkE|Jo0jYue`^5qBR9%J0>TqIj=T6aNYR za`tI&&v%TL94gH)w!@)L=9V9pC?~vSPRsP(l1oPZ3LFLe>fg{#j3}_(P#Qp_nvHLC zXL?(cd@tiMSPOPUzw?s{cGAf?LR_;!5SpYx!^;%0l8-}DA~0VwuT!#>YVwN{8%w+A zYlm;g)OVYku2Pkom-`a;@v1mxXj20vS|&OcAI&r$xnBhW`_FrUfN~vc^I#%N@f$zT zx#8!A?#Z^lguo!WfSwq5#YxC3RrgRccA;88WpwEnsVMMhRulHz>-SH2<;;(MufH=B z_MDZvq&q{!geGf_INIqCs+~4-MB{2jR~J5}JS&(9u>Y66CA0D|s!Qzmje&KVK3iiN z-+~>tn7a6CmaNbb+JSVVp>{-V9-_zl zJgd|y86=lq%Q-4GztQs+#-OW0Mr9$Ji(LvR%i=NTisd1$aK_5y+ZUx+?@%h-iHteq za{7wr%NoT2qYJt0v)Uzl6?R&05z5~nb0aKO{koXH0`99nnGd<1)~V8i z;5iqp#*Iw(FTUH#f?r-gjA9FK>DMxx&3VZGvniM{V+%rx8+vEstDSz?06D!<~sfVeJjbvD(9L z$$~hKE-PO6zNHH}-pU~6a@=BevSwE{Io=jAyjg|{3@aS>eW;)!AD91p(*K!Xp1JA^c&4ze z#!?LMSEb`RnXVfv4aY|?I|Hcws=@Lo&mnyAcLd3GC)U|E>03p2n`y*+BCmh*fWT)) zA2ruPk30qp-{g~q=vLU4mV{~HLmS&9cfAxd{8Q1we8v{D5Ek*3WqIwt6pNYzUPe-n z`_Rc}`T4sz2Z2*wVtQalZV{Obu?5LU5)NY}Ng2iuImHG z&HRJ>>>RpI{nT+R)E%^Wopfxe`!Fkbe`;@hE%;;amXk1i4=;zyp7cFz<3Sq(HxXh) z6F^kP`>ON80VDE{a}_=d(93F%Otfeu^MvES(JkGJSA<2I(anWK=q>}fH-6qOAG`J+ za||~F*=nvL@hFcyj1J^{pI*Pgrz`)(Sgq$$9`lu!#_}EW3uEriMa~KVa7M&oerxFV z=+l8d8pgt4ZgiX+yf!CaU-#461Y^05jVaW1v>pVgs1PWOv|#O^51m2&H%-QG{5OOy zA_3UtlbrS%bL za`U;eqmsPU&uBW?so7Msk`a-6({=FVT;{JL@IGUm^)&sT`7F%t|MHr23gP809#L@` zu#iO~c1l^Rl>PL)RCt%%`E4@VTi)Q%529T6OHjLjR`9y97a}Bc=<4N)E^VUu4_~M{ z+*~e}sKJq5$AiNLyJ$I2_i+z~cr{4pM1`k~O`_G6*_Y*WzcjefQpDGGE5ERhGPbks zhwPnrT_-QHs7hyIg%tH(z)kLqAU~FO130C#JX~$}SqIn~KlR=h+OzAb#~}P`la+}@ zEPPPWp4jgt^+uVyX!Y;Dm{{V-vquc`6{?C{Jj9`ySB7I~BazPki1TW`m<`h-jn>zY zcaX1JtEu24VsHMZ@e(DkArDSDVE;DGr}k$0_;vTA8mSv`#_m7LVu^gW-Fb;@C!-(~dDah!vKH$UijV_N42RPGD}rN3 ztglEYQVWi~F&pKx5}69(`$*D+{X}M($yT4#v)z85R`gzI@6Q^O#k+p_MGu9Z2SBld zv37sd2J5@KKTM$N?K~WKiUd3U12WoO1di{AbkHl5zkqp zJ>8FXK$#-KFZW-_|I;FlO2BuP?&!}4s~d39m1oV31wyk!91~Dx%GKDz=^w*`x#L=A z+P923t+%-l?cdta#NWVd?PoE151I1_`0h$P=%kABzBGxhLvsl+S_bb!vsy@rd#xUL zLWuWCiYBv}RP?};tQ*w<1y^?*m?(SR`#YETqO<%rYLnAXeJU<1wjbWTyf)7b=Es-d z7E=Xl>o7CN7Dt3WQn5Nbti9Zv9o(M;mnE^V;j9dShH`hf)=}WIVf+L6LWUyKexCMz z(IAAi{6f0!zDReiLslS-cEt%kg=+E0;M$tR*QZe1Mn4JR4=j}F-n!hgKMR;(yDvKt zGTpEIB$}UoX?Bi3(DBZ%TUo|^$g%YY7G*HV1Sgc9#K$MEfc zJ_(XaFR2e9w)k5t+VN;ftf75hW@KDnioE{#_@lvHk}WYKbSjU|I)ciW&%7Tm*=KI- z-fQ#Y(l0CWrK2qL(4@~JSHyBB!Ho3C>SvikuISpkIBwMkKqwZOhg)2QV5{^Bf8Yc! zZ%y^1^;BS)ym}0Z%;fqjkdl}9uas4;*GcD_`aKC`csIu3^+->^O~aHClP{)rm!2HCDm!DIK@;9=|NabgDaeZd7emF7CiyaNf*48RX8(O;w(E?R( zql4d?d}~=VBz|Tp#x+F%H6Cjy1Q3gTZ}<;|d6V=H=6>E>Xj1=WeYthfy@ua_VO{tN zb_?oxf3A#EB5VxVy^r8GIQTa42uRJl6v2G2d)o2*FJ;8tLo*0r{Zw$lz{Vwz7``&o zpakET9)@5H4FZjTV?o3>TPPv>1higCJR!vF0SWO^ImxE5siL@?bKwXKx5_YYb>@9) z$ml2E)(Z!)xgPKnKmlX1T+q@KzkQ=JDO|dgflYo_VXc%$m8VQ`e>5Ap#f<0I(Ddd} zr_5?~HNVIbR+;xwv$F^nDd=(&H_9=dMi!XnP*%`(x0H`r^t(fqA7n1xz3ktoge^;i ztEUPL#^IG7P<`$L|I%rSVD9e7fG13R;DICuPgQ>3Vs>n1&{g=}2J#2`CVCno#-HqG zqqQkU`!o5{8L1yO{m2(R?_-C7^3P(i-JLJTnIAoQlfYU;X0^6-gD;wbnRp{??!iP0 zh#C=<{;n4HBYwinId9~YvKXLBTEI| zq2Jxg7QkPcA5n(s6Spq@nNv(4ORdBd0%Gpq^DTa?)X=LNy|zsWq`#d6zxqS1lLtgP zGY_#gj6SW__1CZI<5e$Bh@TvChc`mLih)8@lK-SLkn0KV2Q@3$uf{yia8Au?kDpfA2UvcOaumAcAa#EzYak_!&{ph)eAt8gSU*MZvFm}AM6b2SHte% zhZLNmntlBH$}HYFXMQxPwkcYk=E$K!s~-{B)A|njz&~nqrar790CvOX-WXa~XV=HC z_}zHp1e9yaeyAjCwgua0s5;9;{##t*wE|VrxABVW?D3Gpwr)9`*~vIi(OM!3-yZUN+ zvqm#g^Hx2NL*gg+@=?7@Z<|NvG%5%j&I{&BrYZDQ$b)m>RF%mMgb@0-G4Tq;FCOG@ z4YkrNs46R>j7wZ_fi{@Is5GhTIw+f$OX{7E{rUx0-YS<_+M%#WtLnD+jZ*Nf>2*7- z(2C!t2wurX12m9#&jWge%7v%Jc~IE;LrxPP zXLTL@BcQR`0M2_~up*s!LtS?kBk_0LX}!rLB&m2$xs=h*<+#2dVhc7Ybi5K;1{*>jOwFnw-(BrVZC7ecAc63plBty6Hqm z>Nf3?{-gryL-Bw${;WI=uR!4-IdaC#UJ0nc2~j!e*jts1|Ivf&ZBh)pSd4S=Vgl2V zt3H^#Pm(rsOyQ2T=-|(wrRLVM%gJ$klF8r<^{of@RqDCfpiNt(dBCZ1@P({?@92Lp zSO$)kmw#790fzu&_XHg~qW`u%>i?zJ(SUA9NVIHOsY$~-fN8-Wc6n+lbC$b%^e=Cw z-^3QAih-5~%yfk?lmhpBYLWwq*7z{M8n3B<6Y=x2BgCwonV}+&4|e)*e!%TJ&%0=c zXVfhV;v^^(A zyX1e7BKL*zfSD63h1G6|5A!A6P|Cd7;$w6FqmaA4XQxLySrIQ)0h4m;mp`)j?#0Qc zu{gIsn(7Avg?wydVcp83b?1?H+Xg z+=e(#OZ1V0v${4h$H-&gS#Ha)9odK3!Z)ep`fkwjrgqWALl z1arb8wzC0rh5R0Om=Vl^S+Mc@@crU%V#yk2VX=Dmlcb;W3VQWRwf7_2f4YzO*i6nR zKpZtzPm%)7uD^4^H~M$reJaQeyzDQl@NiYq!<3{241w~7U72Hi%O5pbWgIq?jzd~Iv%XrMk6<95)m8Ek zFa#M#1f2T3aB>rK=fx%PjEREIl1qI_PH**zM}O8V?-Vo~ZjLUXaxC+YnEUtN#dY1r6qigyT+U@O^-^GG zAYlAq5g6uHNS3{ccvs6oH&q?T=0+TgZJ+e&2@K3W2h-40v~lr+~*~E zl|(KI3dWq494uaW0n|z$&<+^jf?FIx!QEi|Bs9`l`#eIAljksH0)Cw21tV-{qnZzd z+9R0wBIh&C5EmmR$*&2k=+Kjrr|^iFx!3OMdK8M*m>#JbIn=yFM2Tip|8+w|-ZWSO z#qkbB-I^{#(-uD-H*fa?gZ}8@yO zB>c84$$zGz|7V)8=j3wZm+{(-B#EEYZ3_Hi4{nc}dbn7H=N+b`)oY~J6NrJ!)2ZP& z6qzb#m!8jNNX2)teg-knPK@TEFsSx6QwQy<8z2=H`%S~=guFQ4<>*$dg>2&Q&$x}3 zBlIw-R(u=Q-?im4GP|aO^hEr*zTU`1TnRqUq`iCVPkG#giKx5T6GwL9Y}((-)emtK z_s*j9;c5$Qs$yU7q?$F+e-DOZeHHzO_cKouM&GJS= zn7;_e>3g5MUPS^#4`U?-zcv;x zw$TW4IKLgJNDbEcS;`OJabUpLl>w-1#8^2rGF5&m?m zb+F0xgU2Hue)-1R0)P1seGkC*NyZ2kQ)N2XDtg84-Vh8+X&zK+_h)9BhprpZ>gjNo zxNH;Nx4!GTjOSy0XqSNfz#etjL5Vfj(NZ_efXCLPU&*fuN;64~8jDr~;NXg?F*>-2 zh{8lA4RbxAxet{atCY3Ua248_RUcz~P~a7@`yTJ6b|=`Ko%T6ObjYW4sJ_gsha{Vg z`fx6fv8HOZnp>6jgI3VzT4Kv%+QEwzGY|*NSi1_g{dzjLV)EJRO6A{kE_arONL}spKV)Exy_87S6TKU$>ng~D|`b_r(gNVfk}!43p}OoD_TUVLDZVk z7z^nM1zYSeo~B8$V^Ivw4pSjo344+^z`rFcTyDxGkBT(=9CFf~04&fjSJ=n=&euAZ z>-o8wM{N`y73QmEMe-iqX{<)#J9&Wr405IXI?r($S_>`$nL-%s6X>N#GOy13Mp5?> zzJR+#=5sgqkb~`!X{5RX6Mjau@-43RJF@JI`Qf})Cw(&Iu3MsWDW$U-lNtEcxfL6v+k+_A{A>c%)6rc!TSI3CmZ~?Z^Y|HK zRrtuEuZY;B5=Ed4Yt(B?xon|f5ouk1a^nMoWCfTMbbYwono(bW74o2-lZ zcjh90N9$r&Bc$OQ0w~cu$%R0-ZludL<`QZ|;Aa5-!tSv@p?KgI?93mtM7X;70R}}u z_6~akcQ9ZAYdM;CN_|hTuy{t}#Xs}4gTnf0Q~^r+m&`;TlMx$Hw>L`@q^{DzARI9* z^G*6o*fY%efT2Xk245EL+4rl{1+=_Ett1UN-GLM8xV2xGo^1q8+2{2H|^shkL-@J^4Y4>9f zg$rh;-7e<1-o}^6$c(Fhn!o#viCdOqfAk9mAmD7GF*Q?PN}l|MBG zYAo1YZdbXAp@uE6i-ru!KRss6?eo60z?rY0JQ$a&!-0HupH9rQl0fsB%qirZdeonH zU!`T*bG+F}bO=q`8I<-+{{(==uCtB!5m!tMKMDA+@=DI)$;|1|H_6SOpS-z#{|C0` zb=|jUmv`!Kd`)CEDWJb?j_gR=AAY26<|<-nsd=xsAms$LFZ3&<>5oH?A34E>#8&5+ zU{lCVxe$hblM?tA;;P`8p}e1-j$pM58eo7`&1k1V)|4rmMt$Z7360TMyp>;UJaw-}aKy?K~e9sK&lo?co&cxT&)}ElrdqXZ*3W)^r+p zoYK6HLv%Hc*Rzm*t()Uv8#HC-VGh`mKg}3qJf>X>(-(O zYTnVas0VgSP)eQ?McQFJ&a&UI^85X}-Qe>!T>gD&)cJON=1~1DYg~QX@?~-Cvz;HX z+vg$5|6P231_Y+TheyLkgf?u^_|a7uR#r{h3BOo{BXtwzv&s2IwJ}*Ey2>)ZEJAi< zy*~SG(a-W!HQ7TBL&o4=HZ2`Wy|mhq6BtAO(Bm?bOgv5l0*MrQ+*3Y{;)MA@KYh5o zM)w-`;0hm5(emniG-z!w(Q9^lSOqsTjPAhmSZwAOk>by^PdlBN(D1EtCFF#?CqYaV z9q==6IZ!eAd9RmB$FH(1JkN|u5RfB1(as_C=VFSV!}-~l^H-#{EbV_??E-g?YY@L+ z`LY?ocRAZx5RB3)@QOg}^09)hye!TNulJ=(j=+$z{ZvMOnZ@N_ANLSsz$C^TuDXkx zAurgy8kZ!#o7O5bbj-V^N-NK{lI8!c#0ZXt$ErK#NuDlr8}3ENR! z5=BN0ffERit)%K8w%jWQ#a;Sn;MLZn<7s+y= zrcrD4CQDeKUCqAu7Gf;k?i~AMnFVZg^Gy_8M~sHc`@6qXiQm~b(ZWfqkH-(V1P2Sn zQRY-B8DS@OO`60sVzSpgC#cKw-Q1AP@!w$+`<%{CEPDPMhv1YBbN7^OU@z@;^#x8H zvAm?yJ~_fjr6iK2(x3)6kgDpgRKEK@4!pra2^5t&IaO&0fCXm@b~MYfXX zUo66F@-y+s8(a8>RwsW<)&+fMo6Vaa{cW#Q#bCs4LYuthOAZnv^gO6ER;eCAs4HB1 zSe}+-#tnoujHeugMhM5Rl+m>((waq0(wIFD_7EaCh^g7UBV66_+HMoQxQcBLoV!Ix-47aHELAq-S+TlNhSvZz3Az7 z`R0VhpT>>3WI#M8?EYEp3>{kM^+!PL*8&?A?>hOdgRR$PaVGtYBjV4=IEc9*@}Mg| zjutkgW2jU?p;K<0IQ^*@F|i5aI8$K1+#2t?P1f6{q_*e7?-D17&G1_;e7}tJ=yp){R!= zX*ihFj{@Z#c&wzN*j@W&703A#a?&2+1y7Ee;IJtbg+VFjv+$Cj*@cL}R$@Kr^0km& zQZr^AsQL&x1Dzgwp(5x!_glv*om-b4`ljD?KKj$LqZt#>M)*4a=k)i?9Z)cdNili3 ze9d$1b=zCjbFSqC3%OV_%nQtqq&_dB4VYk4>GnY%yL=asgIqBvH)LjhUE@7h7bCubn&%w^WEv-KtuneUU|r0cOv2k7uVkNi2#RWsC~ z88B%|AVl8ILdfsT+x;RVnf*QgCuS(KyO>t9|K5yvcD*e<1~4T3M@mBfxDPlzDelaL zPZ_Rt4ZNli)SGO_o6^^cmL9SVI||q?1%|CkNsBzrsVZ``E0`94-^_q9Q;@-7%xYg! zPvGt1hCa}NGblT)4%NnU?Zc82qk6+}{8vRHCjr52hjt-vQQ`A>sN<>!uGHX9wB)|j z2(RbldDTKsAbpxr(_}h**>K-Yj`4ZUWg z=5TDh$oEq*7$foYf!(j*2|K29tdQU@h1d!v#t}IYb-A6&6%_OQ+I<@mNhIDNU+;Dpq~Ft+*X>XZ(ABX3v5&<6@v$UB~4|) zHVpl;9^)d=7u^kRdfq3A>-9lNn%w9W$QrHev7%JIE)NvI(<&@l5D>95>kU8I%LM zo1JO{%{$OuE%o1Lhf28F!>>PEy8Roo(#JX zj2z>HKvxxUA108fUQT7`(Z_@LUw1h6WIKMAvG|OjS-`t=Y~LGgh=3S;b*$A?^Q7@1 z1D{~?ERNil#!0IESPG-g8+#Q|eOXVrJvbxe>me|Tl}Y55y)utz{eDC4^XA`iIu3G} z)+5k^W^QaK=V$Mv<&9lv()u#jO9vvhOr9XwyCDITG@HWIcelMRXc%QW3JlmHryfDW z-MM1^kEV0~XZru&c%mpF2{~6Pr$RZOwo0WE!W%g*$vNl49A+zWKCBXQTnIUn^Ks6S z!<^?l=QG>P*k-oRzCV2bg56%Xy|(A`@wl$*eqH0yUG!>8Z12Db>d#J)lBs#whdHqD z@-GV?#9PTb%}dFhKBO@TFMaA~hdcuwgpV~)!%AW0;XiP^3#9Fkc`Y7r#*8FwjN3vh zCbEX#kK^oQJy|QSvgM^e3NtIRwNJgID|C_h>(K$1MWOa--2hc6B+7vL>uh7!^(+w0 zl;ANW7X!=n?JM7RPRs}S(fp{5o9-w3Z>W}xg$@iP2;9Q6W@V8^pQsBp%v>xktvkRi zb!MZ!__raT4;05Z(b_b=K z!(S^X=&_d0GRq7iA$GqJ|EX7$v?91uy4>*QAZ1uK=_TaI2B@8@2kZEg0OP2NKAp0w zYQ1@{=x=@mp3bL6e|kCcBMYaT$(h@|m@DLdYlZFrRzFtFqj*T9NB3{hq$lUEFw59l zuqUrf7+&myV8*1%KR%tk>B6nT*kkME6MYSCHGXG1NBDWDmod5WVlezg;auWd2AJe* zg@9@5lQyQZ#eycI>+I{7fZ5CzFlwgS+x?u6tV*lcmk|PCZnKl+_cJ@qlB*B9|`J9cZYnS zIQYq0WcP=<*%4Itda|-RLXSdZc^&Ygxx~~-u_t@9eLXZ|B~*-TT3NtmK(~Hq?9nIr z8;Fe1A3CrgM0fx)j5WVgKx6-}c~)cz*w-iiJ;O$0k&Mned}zXH zYZiTuqc5gvo$KCw+0!F4l1y8tf!6U?IXbC3+Stijy>T_a3Er|;1cGh(Xf$Uc!a0M1 zkA}5aZGS$GnX>u?qm#|~=R7s)#iks_uf)pcthUpKHjYA28}yyu2n4ZAZ_>s+x*IQd zbnXT1bjx_@cm(uN;0Fga7rp!Z}5h-{i0=}lmiRc35$Ulpc(l@@9ukGkH(`S);-F;k%DyUS*4n*Uyw@B z_Hpm8GdL00oLi>~t)KL2@9ek`GL+HLG#D5@FpFzKQe&$O>OC)Allp#;YUe%!9Y0?n z>c^&HSGP6BkeaZ08(4abKp?tQC78}9WLUL|ZBn*8#x4*Oe9Bvh!Hzc6?o%L3iSZk} zPu!^wfgkb%9Ooh6EWfFzL|51E*72kMOdGnfA8+8Vx>R#^LeqTxM|;oWB%M?UU@UX- zV{ome!Qh1+e#L9lN4w!z@M-ep72&6qiBA4c3+!z?A+lOVE^xKV#SF`f_MUlH4=c7~ z=|x0*_Knx10{791q4E{_iHcc9un1_l0_%7*<=anmL_vs0Li<`P{QUDFEA>9mz%4)| zdxZ@l!Ww+A_RSZ7unJ8)Ej?(sARyl-r2A8hmrbBF>52cbu;7jNk+h?| z+)GVYFteM9Dv zRVat?20f{(perAGi&jiZRU%B-A^rb} z^;BGm)nVBaQ2($gK_iM|r$c6ajh8@P`!z_~NTAs1IIA*bb_NKf3^*bJI3HMkKu%L~ zCvO7eGzc>zz4W7>okuo~xf8I%q(lZ%?CGaw+3A*vqg+nA%}r6yr7a|V&!a3u9-33O z@?hX2(+MQ#m`em_Q+7?6e6pI0dX=T>H_e-TzTp1I_Z7y}R3CD{XauN&pOj;U(Gpy< z!AnmgQb-0qL}wJ{6Z832N0i%;Ds35P<<&2^Q&!T@gK}Neki85%e|VOFX4qh*TAxyb z2@HH|zudP&Mnr(hmjHj2*A-W#>pemIotw2`v1UE|`TTT$(}L(98zpA@U1>I!7B^LW z>A*ZADIti^{ezgjKqGdf-$ZA1nOgm&oMgwdr8>C2Pl5Fd~h4!YA9UBp7+{-MqwU*sro^_NIP0x7S4sGs@-?^n0G$e2<# zpy%-fQEMSU=!QgOwQf#Ud+45n*EOy4VjCkspaA!@q|e_k7v_Hd)T{WJz$=CO5hh7x z^+qILD|(D}=H;>4l?oqyEkt7u+7CW7FYCV~s)+j~&ILVo*hvaXFRMBzE+0BwFi*+! z?emMo5|~hRp{S7v;28<40AeN_Fv#8VTKb7=+ZGi6l#d37WASo8%{`fd4DddhCROnrB`0d9q3Ela7nJ|tAW;KzoT4OM7c*;eOB9Z z(@!qOye|6IL3utBS;xkNTRcX!Qrm%my53q3j6FN@hu&$SAT0@|Builh)ruClCyBIT zxp($bISyQ}bDZ~HF1Xdu+w<&tlin)|pSnZMvINz+zy5MNhjsm}WT#C;G6flKkn3fj zNm!DjX}gK7zVa;+`vXc+ABTsKJRrU33zLce z`DzgN#_IW`@Glrc3wc%UH1OU}J8H?qWn>)U?b*>dVCD6YfHZVRN_5B=zviDf#{cGl zR7+V(fDOB&;QOoLalEQ8GrJs#?u`!9|C9kAJYeC^k3MvhfKkUATF)~d0L=Vyzx`t= z9Fh2bd!YmUqIpaGV?SI|32$)A^zgx%DOlqx(6^=XgL5Uqu&7KXIfJYNF{G;F@Zu*R zsTW>paU}s``^ReygzpqyYcrrtQn_;tM|wwhGT7?@skeg zROmgDcJbLALaLVnI{#J|u;~cF^l0z0t1NRKCXx*-WjpapVm8xRZTyNE-sCUg5uyQ8 z3x44XB;Uosi|qziDT$ujtkUC>kM!4Pg|>^Fy-ZN$LK$DoM^js zuR7aIBz5)jp6Um0LAo=Uq-_9#4%OTxZ~`e`25aZqw6&~@%aI4W)8$zw#r<-?6TzXv z^YRx99_YK{7#H5aj{C^$A5=DWvJ#bvKFow|qI^qtZ=@syyTbx<4P9l@sO@Hk!E)_g z%0gFFs5RQm)OGd^IdC7{KjbCUImWHjx`$PTDby+ONm_n_$c!7#Z$cPqq(Z7au5BHc zJ1j|?fBOt}s{8Rh^vS~zRz2x%&x?(XqV*e1}9aKzqDb1PrF-Lg*?s;5>knJp67VPo*7&4pR9e_zmy_whG0|28sdhXlgS z9;941nW?3kIO$y1O1fN%Hk18e$>q{5bJzN_e8yfeiyz71#3Pt?ilp8a^g&U7nq5moEs}MDac{dbJj$Sk3y6vMllA_m6XR+&f|D zS8UVHxTiiP&bY1HOU~tl6-)wx_q+8+uIIzWkD^8;4GG}xPSRKpX4^S0+|TvtCJ52^ z>_zQN#5064C@C9d-1&i)f?TMB2KK~a{5^ewcak0L*S5M zSoe;KYOr_@GJoX!8c147W0D||gu3b{Qssn}br1tJsp4zS$Ylv=05@_|TZ6RmBP0}k z;Gw2-5gZU0tx45^3+SF0?EkrU++Xisf~WhHAG=jh{u;V^|NC0!5yJ?UXc9Yg=^-;7 zkz4|hO&xy#rCK-6@}o2a3$Z;GpLN4$j>!oHW3|DWhotqL5}O?m-UD55;L#olR%C}M z{?%VX=)VMp11Uefc@PO*nRvUoGX<9w9pWFkJ>40O)HYF31oIm0baHAPY65MS4Tf4= zmTa|=#6m9q%EaFp*?gz?nwBX}$hl5L>oocw=p-JXj#4Zk8;|Vp%xgg;Qz8`aQC{bU zV}_8vGwldvC1~Z?W-N@Io@;Qj_qk7@7UO%$1K0GFkEJoq%eYldD+`sLmT4o~O}a;r zP53Wb{!Uvwv~w^@oHC$31`2Mqi7cJ`;VgnZzGC83F@*c}nSs?s16)G#_*z+s#)wu9OdC_LR;$_gG9A3Q6(=Tig_deH-dY7i8cz9NBjd#F-8mL8@ zvG%QyvG-(=f_E@k{Zm^s6rK=5Mx$JQ%9ZLq@9TT#71rk}0}B)WPbbU;GDK3wkXV1m zpj;qlDxwV<rCm?mJ@t|e(SQjtQG>&bz|18QUAVw zVvSC#I$;8f2%eCWYdQiQPv^1ly9KXd@%%JemQI%R{_{3Fh5uqRt##wxkosA712^FY zrzlWn$O4+a#W8?MhOG!NzrZYtC^Fi9%gm?`|7fO>j)QgIh%rElTDz!daXZB5db$h5@05fpbXn!SG=59 zB!DQ~K6mNl#y4wHFZH5W;^-3yd}?PR#mZps7+a%t+&@l8i9AyGOqrOpwl5``E!{Gu z89j{Vqj^9fL!bF2kjI^~^L}H@D4;@?z9m_R7IvODD1-$5vzqPtg72U3|{B-dvOf=ZKcpyn8soIc!^~UU=vdU+#yw$cZCO4 zB$3SM(^VV7Cq$6X=A!tKwNrNc9VmvBt+#@ahF&!bE)_9Av4U6?85pFDa@g#0fgJ2r zCy582n(|NaH}e)-oO zCttutAgx`bq66dpE-EJ-(`xkQ%$F{nRLmwH#6&PwDr_o|oXl1Q*QL z-A`&{g?J)GkRhZCb51@-FH&f|ziR}kH#$kGj%bq&>$>5NPTFa2q&v0N?@;Htx0E;T zGW1>3PR@v3vPi?Y3e2zTqRhSAP%Vv;#-5i*wWCmDs4r*n)E;<#c}z#7%}?Ohss<>5 z*5L)Vf?>l8tSia2LdsMS)cI`Lo$az#r(G|MW>We7O^J0w%0oOMg50ugQun5`0oKDAkV%%6GNl4w01sJ{CgaGDR}i$fy1k;R(Z2qM5yzp z4i}Ermlah%sl>rs!3xM-gVTGzdks{y1{4f_Dv>Q?{61dQSjIdo@E@!FFo{P*l#+Hu z=>G4=74aa?OA%KlUXK@@b*cUv&!+_KD{qWgulo@7sRcJtkVvPOBKle_R@nMPZ+uV| zN{&-nNEfsxLcPvMHoxURHjEhmV5G!t-$2j25P24 zeUa^^fm3roGb64YPkO|iVLS=NGhMX^wxG&9eitchGDG5rWD#hbB>l>>uDg|IfWN%Q!W^nMBJg~Du=M#90TE23B? zdhdpn32r2k=kYsH-p;?e3+F)7Vdaumwk~6l>FJWoVkqAy9wv7Ub&8)D4GR01=`#K7 zg2u75Y&WFvqYHng7P{|JBwVrxcf$DtqWn4qY@1kxVeK_l8$)V~Ay})R`z?35@oIOt zl%H-> z9YJY_47td?hBtX)JBM4r0(V3HeOsG0uvr0XO;=IF|rAWV-x~J0_wv zbW2Zucd>?~GWpIw*Wi}Jz^}#!-%tLH>pXQzjVvFL2>sifAYiz&=R%eGqqJhlKn%3W ztyB%G&;Nc65yJh}C?0?2UaUwG?kV4+>N5ZR30wWlHtj?uU8Q|^E;67@^S#X^rPlL0 z3>{jY{aI&ey>ip|IiHT90_ip_Ol*6r-Da0@AYP##m3hwd+|k$CD-w$25T&GQf$vhT zYf`>fkC{rJ2We4OQ<6t@ZvxMEqwhAdxpMsg9qt8h;5`nrbNrL3(xh?;m=gEdib4WA zRiV6*o5~EHhBg`Qblt3Bkcf9%`cYi;E?$hr+_Td)tb$xCb#F{gK>fYEc81HzBi(zi zlLYD5+K>}z-Fs?B3p(zw#ll+BuUVi1jJ?$>f0_-KkAO`SH7`XwT+Fz&IQ{%Dovmzt z85SLC4DM^*J3J(Q8FhYo*v6b$*`cp3}2O$0r`=dCnIhSn8h|ujP}ch z9LFC6kA{soS|?r@`E&-)3>Pr+9wi+*9)GvlN$+__HtlFeuO|Ncvmp%wxzh5!ma*O0 z6A>GSI~(u_^tJsHJA{xp{_=Hc;LZS(U5cyjomY^5KhggP4RkzRUM|QAILg#$4Pn?+ zy-stzmUBJZ9nnBc!rA@_!!QqMB_!vgbUSM5rI?XltJ3CrU+U67+gH@IbO_5%n8By; z*IaaJ>@1hx2`2ebhka_=;(s?~d-PX~JVWj!A`-w2$TFrC^-6#D_NU-!~M6rp#86z+ctGZ%{hz8a}!3}VJOlb@5I+W+#o zW8s1iQVCIg+PMQ?R*TDQ zQq_Dq*c16c-gx^IP+I=@YAfoq?QxFoU8)$6R)B&ygUb3ZUx?O8T+?RVebogd4l5j` z?dG?FyW8B_&ba5!1`zzbEq{avtZU7Ov2xr`Zdw=d+|249<6eVe4N)h@LFi%Iumb%o z&Qt^8T9yBF3zQJ9%!#*f~v($C_v-6?aeu56{XwcZq(s2z&qS}D8mN2w6 zS626mE%mX20n&boe*F&Du@^`{E7{fe)*>E5Asn(ZlS--w1a2DZpXN67tU3YHX z$_^ebUd0nj<>{^AXw-s|vDMJC3uhx9N{A{sZ^^D}RF zwaRE)?brzZ*837;G*Q(O2*|tMqE6cw&E;_ygS0!*C6>pc?}pI}7%^2T$E?F;uD2rv zneBDGiefIU2k;RU)@#xbf;C|h~AKj}E)`zDid= z_*^yAOv&F;aO4Ny1H2W#SKW*UNexF0C{D& z-fFg~5$4k!uXB1ZAW&r!{$t&jSkgLYBq8ecnoWNHMJ^7uxV1r=Nc!4;w_#en_$qDY zHKE}C-w@(b%zStB6=Jki(RW~GLCSjBavk@#0ES~2j)j6%E;DC z!5G4u2=MKH<$DY0%{do^M(`(S15o!hpR_A^?`1y$8CO&Kg zM_$;zR&yIrtoYLtKqYE~2(08G`2gVo`8d{L9o%i(XK(()Z5cwsl&;F?Plrj{_q3k1 zs7!#({S|Y~@LY@X3-HY2e)g#Dghy6TVP`^33}fG9PaTxIe#WUAXl92ESnLDp|4_Bv zuFliB+uWBX6MUiE0IGG`5!wGr!hWO)C>H(G z(z4R`^WENGu$whh^U#mSNtpZ3x-if>jIzatmB^6|OoG;F(sVhkm)7KSZVa!t1#w3^ zE-o@5=6wpp4O9s_8`jW&L9CYI(<7_=pZUHZC++9@NyD3R&)23HK0z%2(UbUl{WuMo zw}-zl=~^b{^bYPj2c~AgAT!eVVRp>p`s9!L&_ON)=icao^X1cfOnkY+`CSqxKhnJn zRDMOox>{r#XpT+zRDp)&(>+qnA%)%5=gRYM5*=F}>@aLw>PN?Qsrt$}FODmg!T>Zf zeE%?eZ+ztL!Ia4c2=)&?G=K7QesUJF#{(BQ9pFJRSKme)Fow;bAX$x-w2|J^qc+%V z@SdJ2z0XYl$Z2YLOvC@0AL%}8qB4IxZQjG6B}T}S+yey}i(<}JXdXn`9{ zOO(*F^e5*{E;#v>j@o=Wzx?TO<2p+8b`KEasp&Y0hdk0Y;}Sy)DnkM)TNOW$_^JDk z!oV|xOh@74Ss_`ZJ&|ZC_35!*?-Cz$+&JeY-Fq-c*zrQD+)opFj)5-y@#K<3o7;*b zolwr!KK%PfjOEE9|7qcy`z5FSk9%(-C>!)q_QWCJ+JS9n1oR*ZezE+i2!laG<)pP; z4JjFm27Q*I?8MX(4Gv%EYg{%XF}{Kt8`6W{_;k{*F$vLt2!l!vK}8&UFC?g{R9W&r z^cz@GgTh9}X>q@-#>O@r>S6{EA4#DfZYda0kDShna*kXK9s1%s06koCj3{rQCNs>; zf!IAR4^@}F2w35zy{>n$uqN6b<%uKysicoNE>Z6k&Nad++PrSA@6kZ!H}EX2KvI>` zUeape9wQ*|GCxZhA?4>*>VxT65o(*T>~P5KKwoui66XR*WXIpHRM$gKGDjKl-@C?C zMca#@h`%+~FWclcz(29|y^~&&Enk1O{e9kQJ+FJWWlDqkocp$Ew(r#APE!kea<3@F zJ6%L&!(WJNf@)?BzOB9%+CPx0z{(|>rm@v@M=$OCP%NsYdC9?5lzfWHi)N~r7)qzF z7E53}$)!l@k{TwYw=wMeNv7x`$h!wC)`IOLDqF9#!0PU08jU6Bsh&5iJJ=Q3IVTpv zb_ekLd4)lyX3dgESQm^>r8eiCBV$EM{Q1t3>Kf1o;#80v_CgZOF!;Se75cNE4-$}1 z2`Rb?5vn)d)n79X9Q1Fo|JW#h(^OIgG#2s4Wj3VX$K>T7>T4qCYAKFp=Fh{*h36E> zAx(E=UIM93C$}IrJ5r|4I9i9KdBb1D{YiIy=-uD_T=_tfPcibpbx9R^T?qM?v z@mtAI>WrqY)nJ@xw^pQ2h<^E2ERYgzKt;=udTNM_gKvk3 zwsOr|%Ys0ueP#(QOAC<=th8nis_}kcA&!+(P|>faWJONU=U@$ubWQYI&~ljAe209^iw*!-58kZSRh?eUFdZsbm+OWxCV^iACN%o%z4Eaz@W zF2(zKq_7hlw)4(XhUU!a8iBPTK@xR^S_g~5E%ncAR^7VEdTpGE@$+dSTcNUM-CL`~B6E&u;L~EPMB~pqWfCc7ew(d# z07RZ7o5Xu8pCI^d!`uut!(phV1UZom@USdaA31rQc7b`I>0t6C>dS_NevVyC_vZGD z`-&elV6#;`#hO98RNp})C%AYUOgq|TV<}%Px&ZhTh;@$T>d-y zCfppj)rz#xa&%2eVuB;$!UBJ<*SyJB& zrG~6O-0DRH_a3FU+MVbbefypFtzPfp)sP_N>35k52|LTdVjo_Lz%7#`?2xIj(}nLW zCDGdTfA%Xu?j7pb zo5I~zw_jhS_sFwKiyb3ogQ+Uu6y&V!eW-)#`%XlfZ+5A2IX-*0ES#6oF(74>pFmwJ z_RR3Sq(eYg#AaMfpuK(%{zOuO;mTY8dc*``7BoKm$wS+(wwv+#6L;58mo|()G=pvl z6XZm2f*}C~dlDN7Ft5=Q`o1-I9zthFHNRpwBxP0t!N)%zF~4!6?g}B-jXwyrhFGtb ztz&TT+%E=;?|_A4Wn8rQHJ6Woz^nN0*5s8GdEPvDJa_rD!BDL5o>IotqQ^pC#c$9arxvM9a1Swi#y559lFoMdIrT*U(UnqH^4I<^ZH<qfgCSm6JjK2GLe@cz?H^ff(jy*&?@#U0GFRnmcvkI^) z+P3$206BKYAksEfcJ`pIn}ML0U|5WBual61=^E|Az%{|0B$mW~WWS=T?anC|h5hSS z@=7q{3S7c0t?9z--x-m*4di9nl0c|y8KOZkk(~|ih6mhEbR~Xs(phqY`OZUN5;4+LbS~Gi94TJVuwj!&PlIUKd=PQjMEG@L&(UM$jgs1_&*|6luPy&R&#T=kn;p^3LWTZYx~K6A`S&auclncxiSY z_F=rSxEpzWXyxp`U(*V$-lJrl-KL!lAQe#P?|sCPz3pVTKrsLGIuSdKQjx$~)yr55Lf@;hKKSdFV&G!HC)8+*ywi2Hm{B2?#yB zV1^;T&2u|1!B@E=5%e_+6t=$em_~fliZg0?kV4SA*oi!82=SlyuJUd<8?ss7g))@L zQVE>#b-#fJr(DcgPkLT*Xv+#|c!qG@hu|1bI_E{;cyQBVj2kGiL4Lx;n+xMbh$>Ly zZxNTKF&Pj3u(!tkMIV(*00%yruSJ9K1$SxZ6f+e3@a(N+Yeq&?AzEWnvGX-r+2CY* zJGH~bL`Q5;Yu7PXu5q7pp=Q>$%1;>-tR(AW1zDM|1K?D_x}_~0e;CS(!yq!2>q_P6 zD!bh2F7^JH+v52X;L-ec|xDCNf zqub+Yw6m(!Zmv)~k+I{&Twye)_ft&>PD|IcJ4|~tONP7P#qrRUUcuULI^l?787WCE zA`p;bnkTXLuY%3=?%fD+bPe6&n(NnasOh#|Qq?I&RLZFvK)hI;aw*D5Tjn zi;z<%7Ok0?uOkiAy{D6s-+@!g_9sjkeF=7QBqDt1yhG%CE%lK9SV;BlWh$^50UGu^ zAs*9?s*9B|{^GsAaR}1Ef69E8?^r-}fpc|GgcA7y+g*qG~N^ob>dpcrYY^LM{y($X#aDPPe0xkL!ng%q{)aIc`C1M$tN^yyUR>nD$| z&f=GOry0I87?#$xm2RVSjd)g;oUzg6WxDUP^8)gOiUDTVpn<2czTN$rip#+?{U$ zOWhZa@d};u^D^Z%qeZR>A2#T#b_PIi)QYYg3Uvu?<(=9s2>EaWurZVsGH=m{dZe2*)LADTC&8a1;Ke7hpw!S1Kym=X0dnkJSLsZJ| zcP;frX=0^9I=@W(19GanBnH;SfDbBK1Q|lJE&|V9ffL)`)qihx_Vp;!x4k)m$Ezd-gZhBoAN1aZVAdIvBr;}WVLUy^XRKcj zd*jxP8(&zSU-u35mAV-EPP^Uk`y-vqN{;PK8C6wgNxmmBk}crpAFto~a(*lRFla1z z=!C54its_{g;LUi3t;GCL!VkOwq_C>f<6w+syVW0(wpwJ*6gW1=M^r0=62Ln#n;RT zVT+RmS-{u9#=i-Nf}u%8%J*t6h+FzL-CZ=(+g&@jXNhOQbdc6oj6IYGZt*Z~io*KgzPKmjatz(L?hau!Cw@ zty#V<-x!q}*u~_&(0EX8+>?4hdPXnXk#@~)F!D*AcoS93;&V9Yr zGpNY7)O$Tw_hOsv?Q1s4lMiTtR_r+k>9zA$FW3>N9YE$8nZ)ko)jg? zb9N02IsDt3Ztr@~darm0e?vW%ni!wMZ6tTF!F2b2(^rx)f@M+cz9B1cMa)iiqOIgN zmk8(Dz}o_5X#14%^|8o{+dl3Xo5vwnpraDp5w3<}iWO_oT7CFcNtgf(f&zb*COgwb zdkkIr4!I!iF$uO`c!U85pN%Pp3IiHZ$hXrj(t^qQeJ)X(_vgU_ppcyy#qCOzondOM=Kbc!v!48I+nyCG+t08Fjcjp2>LD){GKa8(U(7m*Z0GU;#n(Czi=&k&iYjahUP%j43L~Ol z2T*}-o&k^keA;(vk7`lcSu)Evt@9wD_z^`VQP)z=PnlCHQeTPvnRRJ)hd~3%-G!Y| z=8Z9KXS+4vzZW|Ho7Vvzz{`TlDOkJJrTYgqZjavm5bGryW3HIZ;db>2q(!hQBkbFq7{jKhb^9;%rXwt<%Ew%X2Ovp{T$+k9n}^GXlyYtnLVk%Xnxj!G-N?AxJmG3z;|!5W99ZUYO4Ya%SuJELDHJA=C`0t)y>0FQ$i}j&M{s1wyPaf_!8DbIU0wbFcQvFLjlGej={D@n))jX;&&(O`Y9$th+rJ=!VF<-$K#|Y9;8a$ z{8VvM$mI1241+sq0sLGlNiMZ?c4vL|UYz&81#W>b^JsuCx`hJHRI@*dVKky}?3SU8 zCA-TpBlMkdZZFCgE%A(r9d-M5V&cK(}5jE6OmvlZ{}Irbijh&vzj1vif6Jo z8OS?ZjN>r)GvkuJ{?NTVc4P?=kG<;!{d~|Moam}iyRUh)DlRi?a#p|6ck5Q#?p*6m zEeuJR*jRY?JKJ+{LrBGq6b)-!?5cu$vB#eSnhmXd4j9CRBU0O}2Fjt3{X4qdo@TTKQodBZ`$F0`gT&R&Z&QxB-_$!in!fRD{ zmG8b4QY1&0>*tR9*d>@T2J82hMYl=e79Z)a6Ofx#DuG#(J#)4oz1UO|zj}ma0sd*Z zgmggFl}8}PxQi@)9Rr+4yo z>YWuaTB3iEn;?x(hff&MljlM%QZDV7<&485m!=!O7^LWVrXG%`kAV={el4ti>B@n% zUk!kK?#({4oXcONo-asDR0mesgcU`DP7svwHL1qTS@5h$UepVvY@-`chNfGcyW^?R zez|n02cy_}{ZTvpkqNAa-_Sg_;=^lD$f(T>E0o$jegnA@s(+jv2g-MtJJ%lRdlo?5 zOuG-~FOGRHKNVZ_E%lA(o0~%YODUCMZdVYS!nr!|jO5>8h%QGtvr zUVGtY8GOIAF)($3W>t4o+Jt*3!0L*(j|<#cJMnk!9Uq8M+F_^i0Gu0i4Ft8}4*@K% zNUu`f1e;IaUV-aVuHqwovDBX3@Ztt?-bWNvEz;;aeL!Gk%9I~T%~(kw(K6vBWz&zp zon@idq}P7&A~$mAaBc>o>Hz5Gocs4QA0X)iBidz46MdLi(t*Msq z_8c2+wMwkiX(}qb;`d_*CQeIbiqQ)e-IHLla?-1^4doBBeE0efvkkLr8YUvK!#Y9S z=UH(E6xMpm=J(r2yn{tgD=9hjrlIfJ?X5PCw$9Uf0nu0AoR4{x@Uk6oX$kSV4?9&O z6f(o`3O2YEuSN<2%2<{MmT)1BqJe}d3V5$a7YGcV`+B<7U?LIx_tH?OZ_ z2muKOS^jhPwNI2?hjD{}v4J9_+{A*?h3d7hSWvEGE>h>KC;~!EZaUr_gkcjM{m1L@ zj?9*{HkTjoK|=Z87}sfzt@)(fvA}*)jy>Z+hw3ls{zVfA@7hlDhP(fs@J8f@tUae_ zyfrH8YF*EH|Ap;XN%B9bN=4%5{Z2^ZLXUj zwX*G{ebR!P+yy6>T~qrRNpeT26q#{th>W*TWHj#!M#=jZ-_er;;5QL-Ctb@Xu4Ty8 zxEl*LcYq@KGlMV}f;ETX0K-?1Vf zz*XBo|A4|EnET!x_Xw1a^U^5~X7O~AKTb%kIyhxb$Q4Q;b*VVWH7@HQ8#gQwKz!7v zZSgMv;_i2Fpou0oWrE!x68?_q);*-P)?Ye&cQ$NQ4J&|s8uFmCGGS>GHY~e*9CD~) zd@$?u_}c7)Gsa*_-=w**_UA?0LQX zR%e*9)R|UZcZMCa@lj_Mk#m?!>>timR-D=T?DjaEcD&X47fZMDCD~hheW9 zB=&>9_TN8$X>TyZ$otRed!KJyEw033zyj$p}+$1r40)F82`@y z(R9{-O|}mk77%$PM7o)Xh=_n7U6Yaq>1K3y_e8p-n@M+?!y!Saw9^%@Z4S|nLW4<_e78KhS)2JR zOMAw)tIS*xuRPa_#TU6N6S>VkGs(^!=pJ5UB@MlPBM0ya-_M6A>o_xbKn%NsHD}lH z9$uxtC%x;~OEFd(M`I{Wps?3@_uYii%6?b#!CLG7Cp)j>dEX6~__zVTA~G3Fd2@et zstV6Cbqb~x=;QoUMO5F^C~0uIjepA^+5ph_)L1eRhnlRDDPzIgW_P@JOG^T;O_4#> zf9_$;HZqz4_TK%}hFm46=7Z~=RKDk;?gmU;ESP-7;?tO0#CT`&c}h%DgGoe?vvKkb z`l#b_4BGH2&6ugdahCryb)atSYnzA@;_C4krTzBodJTluNPbIUVp|aq&z8;mzOg40 zyK0ovMjrD)Xqz3zErMzA`2+TNAI=b1IJ6$$!=WpBAprg`O$n<`Q@604US{w&8;m_$ z)~hZ@s0K}P-eG6++DqSuHKu2<=-l?&I|tJ@2Ewen&0{nkyGf@&G;?Gp@v&F0bC-5s zy02c)DCGp8PzkrY!K^TK{%E^C8Tn!k2$y9SMHhdFrxQrDL>LhBlD_IEAo)W{+Xo)~ zVb_edIIdz?h054iJw&Aa7Z?qH9r{JGk%B>jJvMU`Zt9IbDViGYSJ$!~QhP|f#?Z;*r%_=ll((@f(GT`viB3%WkP z+v*LLd={phNUEh5GCn)Iy+Ab0d6`99_vJbG4Hk`E-3jXjP#188Pi~1_HUvB*BjP5$ zNh<%Axw)d#U;3g;>GaF%7F)a-ec299VT+=#P5-uitXQAAwn8LH_S=XIT4!yW_x{uv zV;~S2n z?`&_{r(xOn6zGIW8|Awob-LVhMvYiFm-A6sbN37lL*_s(Q(UFD!*tuZ|I7pDs7~TE8R#h8CqT7BqX{WPOo>zuBH1 z>jU61CC*~uqLSkz0r;21Xqktk4EpY;TuzsS`FcCf!y&M>0S{j+rdg1uWalrsvJN9H z#bulZ!!i2~*H8V6Oeo#|63EuC4ZBo5@hbG1U9TqA{AmRoo{$=F3qw!Y4RVCtT$Zn% zpIIuizyEdpt@J;Ln4KV1-Wycr;S(p9yNSN@Jq8i)lcoKzlArr-Tq=;bR24??U+ydK zTSE_?L#_x{EpOviFiIYY7H5V_X$bOSGQmLnPd|IM!rdzt;T{4T(t3kcU00PEin6Wc@`utgTZ=<8rPRAs0g`WDOddwXBDws^e>pt?TSkQ z%ly-Gm6A#p8H*Sf)9mB6I0G*e4u}lHa&Qik%w5~tOXVyA}&9TN2W1-ud z8V7)a=MR_rj3P)pDA>buT-)z;yL8-zm*kZx3vK_$s zzdWZx4aShnJi zPr-3j;Iva`y(Pl&R;LR~_F)|x z(S^E}<}ZT-j^(Sd!#f>FCiaDD;7#EmgnQW*Ac@}I1_!J;I(La~cdd(Ze_J3VP(8a0 zzp%sR)Uz|3K3D-l!z+tVbZ1PO)&~csJ7mv-@}<)G1&|2Hino*0cy|bB;`3aOLnEXLR)SaO7qH#67 zeKCvQ_@n=Dn(|7vfOSGjkW}ecxz{}`t!iu}{dRvbKjU&WaxbVhzIJ?@Od(jH?!$x5 zGI!TE#-Id9f{TUn*;d6pwz;h8Ye%-&Pcfl$|Ap1}D534h{(87t#9jzomvZ&LE^>n~ zb9VWDn$(K=EHO8Cm$v&asQ>Px(yyJ+a0-fB=B>r3iQKM_R_H>-NcA_uStjlkg`7Wd zAk}YtZ@>+lDm7fTL+}F5nNvLvnsE-vUlm3?qX;6cmtYd8l9&#!W0ULJp4RB!mRhIe zG3RZwulj8cM7oY5&Bd!Qb36h2f4tw8D3znTaFrO>njJ@LG$!aY=1UqX0@*v*JGh_% zaS8Vg)!BjB#_k|OE|!b?q_)Z`IROM^%g3xK9UAC?c>u{bi;qgW34~sraDjr%-o2+k zvee{s7gB?9Sx86Yn21IKUJZb{6XS`iMI;HJ@#c%I|Jswf@5aOgMQ`UfUT2;%!J_%-hX0oM zemXAD!4Kd95~8q947F?(*YY-7D+>nb7eYL2XDhye2I4Lke@l(-88%++t@ab>)G+oq z&bRR}=3i!)8gIJht)u$%Sm0+|Ea(phKo-Qo(|be9S$ulWLB>3kync9sIPa(5x^!wf zV4=U8&d)3xO8KU>paf`$K7~B@t3Q&1F0DB8u1_%Uq}ZOk28cviM83qLeu6iQ4NfWf zl~+I9X^_{^TR0TCzH#0}1>TzV(C-MEZ4pw`<}mE^TFSZ^3pv^RZE-wt;S~0T$mFN* zPV(fj{kLp0Y3xCfhp*%IYcQ^BcNXF zV)D^+++lR35erK56w^!WfffXgGomKf=YhH@Jv2e(n3OHqyfLud1qaow@)JY?O>*FU zs$P$5a@oG~cKY=s@ALF`x6W)`Wmth8sjtPehcaO?h_Ai}g{cZR3qOv`??Y}9o>`#v z8Ur6m{<^~RC{CAmy4q&zZh3$1&$cLHw5tso>`((72Hxi@cY8T3bEDbxNsmJr^~&cj zPKuXsD4%qQ$s(@z7u`jtv8BnG+FPK=X|R; zei?+y{>pM|Igc!l?Ihp5XYKNgx`hTJ=LZg73!gB)WlmbM0?{bP+}2TYDrL{Dqa_9t zuqJMor+f${^z>#aDIS*v^}++5gXd@F?@*V||A8LOJ4;WWm&4tdqV~Fk;iT`kmoho% zMf<(h@O!v~icAS6zY0Eq^bN%MAl^E+I_ANt@YNbRObZc)KUA~jNTH(ZGaBQ&jv|_u zhNLUBaMFog{pMXPBBOm3P?Ov6Q6A2(trFy4z*;Yb!2ZRza7KpwJXj&oA4po)qiH6K zKfi0OeVZdL3fg6wz3qW3cc6RL&&;nuDmN*MYW&js94Qz>hJaC|k-mXn(m4_t_Q~(k zU!iPR%fBa)Pn!^CmkYDk^Oq(GE;#Sff225JmsC(ie0}RAR=EqcAB?}L6nJg55}#8L z`5Oj!A>pej8}oLxf52RJFqgX`4a_+hGymAQL;pkW{(K5PHC(5ob3hBFLwIbYosVQ7 zUv;1D8Xmu|UuCTkZw=i4J7Cfj-oAL&j^chno84rC9IiF|`uAJA#*Y+a+jFWo|A$mP z#eEJ9EN?SS``It+T)iQgtl~FcCcEZWf6R1(_>jKwgsm;8YlH#Rcbh6AmiCF{QK!w2 zqX5zr^A9-)b6SqEU+7T1V!C27%6>>0%;7<{;TikGi6@QjC#mMP z(o-*fO4nSJum3dmoV*o0{Yet|jPRTxNE)LwMppM5Pz4GIxV+^MQ56Ckde+{?o^LJ8 zTQ5njr|8Ugh0*7lm*1!kf>woGPO%GKAwqljanKp>kHG~7?`tS-0Yg?_^(Z%>=9ubh z_%e@tn-b=8G#s%GMplY%vgWH^0B! zq;D`L&MJA84ZA0qoB<-ZxN#I}KEG%>Vej?kxP5Z*tGjfo%3sd-9ofar{xyU{I(rUz zPH1n0%rG>eOJtJ8r=r=>cEbpZGqu=+J1}NjClD zH%`yHepl(GmfZG5w*SziO06ly|q;N#^fi@!(353+^Uy*!z!b! zXs?J9j*LB(=kqPwVZR=(7*>2MRjAkB$l8nFH@x5wqU!)ts7@CSj_pGOj>S&6V;TN} z_mb|qKQmXWX;0AoEQ^YfHqH^p%V9=%c-Bdp5Vf9Pv!_Mx-pg$|`6IyE%?@*NsqcmYDM$T_&#&?`E@bHp z&FxqA#_Q79Lq$INxCcFMittu=HkQb#b>k5PtUEV54xUlh&BeH#SJyIe#M?60@;&VW zT8dIHYP*Asu>h^*cK<>b91<9ZrvBsWgJETFxYz$>>%~JKO{ed3fS9*mo-Z3L3v&@Xf#rm#^GD1u#K1gIB! zs%gG+6(FK*xGykosG9~6EYhE&50E$P&{cJ)7qbug15%X(km4+o250i$!=V->q&J26j0Cpo|P^{(wD@>mH}O)n+{*P zox}MWhvmq2!;M3OYLNVcVx<&9WV~P{$0%Q}Ju`o-1^ct7 zoR}LjGUvb(nYWgjyxVk?xWj20-qE1LZwobA1TXmIrh-U76runzNTu14o>ouv4;Fj9 z$Go26GTukwqTONq+1HgOySHSX;va)tgHXw{j(Kl1N4!!w{+zGYXr)(ZmysY~jz}+? z`6F1K5r)V5&vy!Y-=L2On8Nuw&&_JX@Exc3kr1sk%`ZI5YZH!pRziXc8M-!#L|Shl z{IBmfD~VP4*lwL^Ib_uPVqt8~Yj8dvsc>Q>d~}>6UQ@q+3HQy!b=1CUVM4>c8B!UM zEp$?IPd-VnKbYx#v)(nT%MMqCW6(zJUYJ2o=$6dFm|v_&2p;eCUO~plNHijjHt4A6 zm@Rx+`oDVSJi)fd_cftf5$x)I*TD3SrystPtZz}v^UI(u6`?+}NhZU!h zZq}^@4qL4{8^gCi-?6tr0ht^{jo#50=|p*l`IhAMvkmYL3ov=VH9*Dcv54Yb{-(w0E92i@oLEqtE{c`Lao-`8h={- z=*L)o%sV5p$=qcn*c;Jd^tQ&5=gPI$ms@-5tVaLe0-^VDozKLK8Y^M##+ZiL+x~!r zuCemnR;PaKb~*toB~7qXZpbNOo=>!Kk50fy>yPJOhczg8OuiS4c7|@t(?d7;dSwH} zIHO&O&xZX$IU?@U-T9q9-wAD^fFCw%ed6usa220jQ-h}?JIRfIpV)`DhL83$c3eGBzgor0Jo5Bf$2+B7c5m}WK;RoeY}Q>y%+eJb3PSoY^l9L%MnVt(*h-5UR&binq) z`T=0^1ubYNDkxpXO;3^+oy_=*EW9t0v4s}%CjA7;mL~opPabo}i1{+A^|J+x{?T8n z@d+V)5^SP`l8`W*>uz3X%iwO46_ol#ptxy^h;ZuG`5;3zoaii#OjavHP8sM@K?w2Z?%r=n48?R8 zwSoft`QHefnK+-ZR)B>w=(I2hmN3zpYVURf`34D6pj ze%TY-BPSJL|JBOFdHZkQzV>t1SAYjJG95B|k4AuhKUu2MB>`TjKL4{!&w?b5189X~ zLb3mvYc(VMk6Jf?GAIpX))vwv3RC(v95Y*AM_G$fxJZT1ZXk`gl=l7#{mx5{r}p`q zk-xUwdZ?>4>|W^XdmWa224-9?zDRvPJ#GFY2}B9f!<2MtJfTF9J;8^k_CM||XeWyo zKmOaUeP7LVr-3@YKV27Yak69^1TAaOK~FnyO9I9Rln)xc*Ts#W#>=Zx^Sdk;>?Q(? zv|RVFOY%n37Untx#|4TAkLQ~$HJn7N;?_^wZmEWo7bw$a8Q#T`W2|&P}h>+qIZtb;vqxj)CToj5y{t~v)A@^aCsb+4xXH2#&E+rYCgSjXoh4lZRNKwT@t_JH1+o=@PBgZ2j-kB zMEKp@I>&BursRo9htA>}b{7N_FyJAuJWc^QFp`IrRC~aJN@xh>#SMH@8T>kEJCZ!s zF}w8-)OWRi?=+J3hpKQ9DFn$!*Xj~Cc69gGpK2w~$lu$QJ>~gI>*)rTR6J;`LFVsY zr7btA?EhRhp5-CBpOiJI%sci<;KTnw@EkYFPTQMaG$iVc?!dm(co|M z9%PU{=mBJ-wQMIZ@v>UfMW4zKd5sU@{)>^~O5Iq;xdK<6l@ss@?=!y(N4&9zWS#!mp8HGC5XL$<7gmci=dH4 zB`2);I4b_tz4bNm8K=m%kYM06aA>mGMwvV4w9)_e{Ib4s0?|AE80bHEtN2;j)k+xo zGTx@7TRW^YBrol|*X+#Os*NZd^*L9I=Rf;!lP`=e$595mGoqL#&7-#>ZdI2RFZwXu zaIF}gYgJEcxLzFZyTzrNov}lMfv@-95}-{yN>b*)rV9hWEFJsO24&~l_%e|VtY^;& zehI1BMK*HN_ab*JQ!EVr{84s)G)4`^<~#X#7$H+WGsRDmT7`f94o6x;3!Va7N}FDH z4T7~(HZ`ae!$n1;>;19adQ^o9((7gZYIUoglCxBN6vati5rq7l!#9 zRgL@181(=R;$qbi8G|xSrrFQ00cVO9|AcQoGlt70JB`K5t*x3~>n(Qf9mk$CCyS%td^tWx{r(n-J59XX?q^P+{%03U`SnEk^{2!ic~WrQ3zL5zYSDn> zYXiO(YYgR7G#nQPM%nlqo`b89kdF*W~L*(z(cbqisRK+-*QmR-{Z|jS_I$M$I1HEV~YWc7acvGiE^p#=q4t01#-ZY=PYdTB(rl z#w_GS`KFqE{4gy|fR}Z<4TvsLcUYY7-R7v;N2~%48VvnD=ITDI(k?@J7Tj|&_H*9H zcJ7FXEl4jIm>N7uq;Y=vddHJb;!8hsfJL+aBqJ=_|IN=BWrMmtTK9WJ=U>X1W;D9u zb69&ft;ncI>gm!4bKtL%$jMC6GN;v^7>YLR?|5@+33zXp1v=)x)RE@WWMG?u-V;7N zNJi<@U9d2Bx0`<%bwp@mhg8xR_QV~wX!8}{9;oQ~J-Lg`%SsXT=B zK}~a+sEDNWpD7T{qpii_9oKn}`!DzVWm#wVnYe0+Tw3!;=*THPL8q4@5BmiOmcJNxjf|9x>A79pIWuF{*3n~d zCY0%yh@`D_G|^B6rTB}gZ_W&wSqOm^N(5k~52OZ`@%(IESbPIOZ@G8FZNyW$B>|Ba z0-@a~BH4)cIZWfXg#vy2|AJ`1$tE8f6P;J(DO?zt39>IIY+w5jB)s`5=}#~Aw9~Dv zymkyb4v$yUg;eGSn*{>-6liR*5tGkENXoVeOra3kw3f=u9wK}PPio?$J%SdptpRwE@m7mLi&0O3BDNZk#M$ zM?&iA#lP9&|slTetFsDCfr|TLQW) zkYVxtIs6U8MS8e_-@$$9A(6iLa9Z|$XWAMEr?Soe-inYa(1n})y4F}X%YQ<2y zEZIDVUyVM#8a*`7-wE$Pf$I&9?mSJllSU4ye?H!qBHW?slR@*1)*sBgO8haLChLB^ zVj|vWq8N=*_xw;)@DJ)wB+)2q|0lfV{ls+hSA4ye55D6cXU~oRx1K8(x7+<%$thjyh3a_YGvh932=mF1y7>W$sM6*$ev3&U}{2oWdj&~{aw&o(0OXMgtAA%kTQ*LAAN3P@N zGJWWCHZvz8{SW%-FFqnv^%=UTwB&~+{Ghj|aDM+K`cQ(i&F&%@9R)?Jl9yhv#|0I4-ZN z-u#2I`>T(LXg0sG{HOkJoW!5$)+B9(%)9w~ZwB#F7OA^smsNg6*~<{A9&p@@_^JMV z@;8bHwBgPHVV`5*kxK|#(_DK!iSz9^?qq%Y#+zyt4e!hDa#3Rw|8VAe#zoia(CRqd zDv4#zYdZzYF%MtA7^{T(9Ng2@h$g(gg$83ru6+q~s;!$rd;%>yv?;0J!yK}5u|pP) zALJjr*GHXs_-3KMdvzwn*kGwK|7ZdnjkWo`h0<4cgqxj)QBpE!(*JJpjsZ`P4#<{u zAwQV3hIXTu!!aVl*t(=VX*?l#=05Ibl^N5Ysa9RTzuN5ybp3ich{hjDlGNhJ*7@Py zyLZ|l$a^iH@q40C!&>8Tf0A}#hQa27O?i*4kt(8Bx6%zSXo#eX0tSA z_|zH-QzSJbP#%SQJA%S6N|4E@!28mK-Rk!OmecBn1WM|2khS}U)dLAJ<2>>GdA+O5 zcOSNT0mv}%wzunr=EtGnOPB;lmUfwGLv2Om_&ybJXPx6W5CFK7?Ug>F$HY;oTvmIF>qvfC_z^{A;a64`_#4BOERAJ5O2j$ zbT?VbW0E)RpEhI%$zh(e$;~%o7g4T9qg4WEC}HIs&r@_Q|HI(HHzCvXAxv;Q^dA&d zr|ghG*t{HFjxNBYz@ZnZ_h2|NaazGiI7inY5k~syXxgrECZF?QO{dXHYnzSed2B5i zDA}5;3H-_22r~zEl{eGnYlN%M53vQ&0+zlj^LWZ}3~;3fN_eiL1J6I`asQIt4+X`p z=eFOD?bbbtEPlZ7JkJ-K(qc`@p`_%Ov7W?G_VNG5g&XSI+cdU%UiL5MMUTAu(nGek+LFA z#eL@%O3zg&){O!ATG{z1Fa&7#6zN8~SPJ!0DJmY*%;hPiz_6I+mCq-E#YG>!+Rc?s zA0$USVvm&xs~20PHPBZ>(W=Oa%$Xi6SS1SdeyRB&$p4j3~bTu;V6=B zK&-@NQ|ye<*OXlDkZ(F)Lue?0ONpkMPb)kevA&gK5|mxZUZl$WC#+6!=Uj}XAXbt* zdVIRtw(?NSEBW|IE)bsYML`WY`d7jR^SB!@Ka90b^)IqEpApdoT<#nfR%H0MM3s}D z&)57kXBtK`!SuA1{N7uq-4~~&Bwe#LML0Hf{8T+~*7%?T&CpFfb?zrr$s45B!p{7v zaJ@cgJ)Gx3o z59|gqH%UIg!u2r;@Lf`o0f)II-=a!g#vXp9N~=n2%Ty zvq85Y&bF;(0v{)xOU$c`xN`y2{fd%@cFV0M3;HF6CHfOOM{RqOvDxl0E6hEOTW_`6 zyR^hmS3!gT4M1ncoy%ZU5v#Bj`@7S%^{Q0j=Ow};&SMCYG-nUtYdR^EBC` zlskYo7&Kr!1%jQuT-B|Sz&_QDAOJoz1#DF?q(6cIJWM_or*<{p_s#a#HFah6(D?iD z4seX#T+3s6Mt&PseSORFYb`D>_XXFih7!vkF4aDNKQ=EP=6-!S!T?6bJnYC27Wedk z*VNyxYa=HGi`*q8YRUFZkfCqrX|DOv_$l8xa1Z$ks^HIAw;?L7Un@SBlCR^F-9Hh3 z0xXtp_Jl2M4SXy5T_7HnLZ8`4glf#edI}5~kUk9)uaZVh+nt3GIyz+JKJ(3KEUp~) z;&i)L!|_+&AN{n?-2BMvFpvs8c`$Y{zqZXY)_2ha2seDiBH!eZC!J~Qn-11PGR<8IBNpRQL8c@=mr_aB{n4DZ)gqd_0{S&*#3$U~$uD=- zf?Iwp>N93_>cO@&e|C#L3{(Y*S^nLcr)zI9 z*Wse2s)3r{)vPcE4G-XnhR&dOSOx))u%!6-HNZr7mqezY5eMa!wGS!7+wT)kQ$KBt$xpL)>k6nFT8;aba za)@^hSinPwFCQ@U>#C^g^Hlkxuo!xJsKSev_hv;CW^cMVj*S&_6TZ0Uu#Hi$nesNV z5GF3bm3?g6pZ9qlJ^m;lL&NmCWyFF#(~ZX?txte?Z?0V51@nzSrdpeoR5Vz{bCN{HL7I&6mNo?94V7zk!+;L;hU9}_+d_U9&0R<+iko3kMJh+2>&O90WVn~?7vB#gM zKiiL`y?xk!i&>DG#K1b$0w%@exHS=Yv|g4S zP4F)lS?UqM8kGRr&>$DyBz5x4hG2HLx%zsS<=sS{1-{~KPPPa;E-?E(t~MDd>4lWY zM=Ic#C7O;fiLjRk56-@QsQ6HKa75gaId*fyy;6{ze> zHSSb{?`B@u^o6=oGwPBaCRK;(=)NRDH}a(O|JYA$yjras-BUhYS7q@3!m+`4A%)EI z{9(=ej6ubB8#UV#g75p!f2Y#C(2wtN3;Xk{ zl&R5fPP9+nN)OJtABZIYNB>Mv&sYls+!YOp>D{nF5v`HDJj?2KluD z<_kr02utoov_Z5M-}R_z(zDq-gr3WI(1?F8cv(=Kjjg4umW+Pu_er}cyHvc@K><@| ziR0`tiWT&S97#g02G~E1<`Iqtdv=8R>1mU59MdLU#=khmmSl z%*}F~JsXR1YR=KYyvTeIF2I88Gjx36)`9_dkue+yuJ3N;F{eolI}y_Jy1F z)h@18OtKqy9`!dtOa~#>MEZ6A!FMB?_X)SNarDuJ%j9AJ#Bb{TlkG4 zG4I+Jb|;$hNnmq(+Z>N6hRfqUqO`4l0mBi`DjOivKT!KLdb?IWQ+#7K(yaMAd3>$8 zceoZtkG_w30cf)A+*CQ$Lwt;OU!5utw`RFeMOjpnfyB|5 zWHa@QCXXN$^1!th1HVkM$2^F?=ubFUOWmO%&=I}++s={A-{x|X_~?`!sS)!ZQ%a;x z62c&>6Qg&4p*bf3U^GAQu2=R%h7XVR3BN=Iyf=XjbwC}8_CSyqwf`hF4* z0zK|uDD3#+$%K^MdNo>24hmEk9}v8J8}N9HeQEq2#h1RDK(m_t{LkIhNhU@^D%&2*&-$}ie&Hq}}0#>ZlBhGfqrts-H1t;~m>}h;=c5FtJ z;y>n2*$ij^b^dos0!vb&6?Sn@fF$H|*$NQv2hEE=kng|gi3noV(bJkAH7tIx^;);j zTcUgag-p<{T-Uoe{e27$fK$b2NCkP(6jbfMAKiHJ4~$|z@rvGdHNN^84Q~s|T?y`V z+uoxpnPdPqfoH(;;aTp`D*`J=^;7K{vZq|5vWn60DhZ*{KdN@|9`F9NT9}@Iv{RaE zja)A8*HPy{L$_j)58cJirq)6PYb8he!%*@K#YNM$~qnAF@tN`=*<#Ea;Uux=Q~9 zQ%T)%@9ZTgxorGq_%Pr3)UecXs{F)5`qrn1|NC4aF|iGN2!KA$H|X+Ss8m%w{Rj2i zUXIgXoml;CDSP~)dIGby@>K!VmK6d#dRgmDA1W(pq1Bs;!H~YF9JV>f0PUQdmTaz|4+Ir(r2KRN z+!`iZvO&M7&5p%)OW^jL2jV9wykLu_e7(!FD>8XB5BmPrO*-R-}=O17=wlBkmWeNjBT3Hd_WaP%$;ywH{VK8tU; zuEc6Qy7^SNxxRz!?ITRs?_XJ7Y@Xg?@`CPDnX8OcHl6B5Bcpxda)`J1xRC=Z#9PVo zIL#3&zr_R}=$rCb*0`J)*-KT%N1mFBMe;8h4uc)ipCd{Y6R*V)?z+y#jk@vyQv)0h zFL0N>#3b^*$A7P8o+g6NzJ%wClwp*rJpl3wcZ4Y;YK#pkp=LkRri+bY(1%vzU-=Ir z|Dhsoa!`M<^XmM!mI^s&zTs^e6pxgY)g?u)cU#l3Wd`!43Xye*QU5R%`}AhE1dh+H z&s(QF_YKXe!!M2&gSQBeENR|TZ3!$-d_e*8(>6-cpJ3L|&W)S>k_(l`Jq;1NUdS+s zmxIvmEAV4cIT#Qc!l4-gCc*qb3>Cttuo%&7yJg}*_cQgt$n9EXRs5MDVtXXwWr3ht z{4m3Yy@*f=$@Uw-iyt|?+?;I$e*FsuynHkh|^fHijv;N?3N;8XP` z)e06kZ{<(o%&J7q1}nIl0OAos+xGl|i!NGjaN*@mGBFA?AyRMZ|@VjxV@i`51z@tz9sylLun16$U2n?$d+`XrDg`; zm#H>zF@t#x(1(OkI!{wj`&67;uXY~1#l>y+0?>)ebwT$By?!Y8?7sVx-o+04J7deI zF`s1Zen9t5wtYJ%9x33Aq%z{mNA+wT<_&INv|cl}L2$=T>sdc1Qs zqZB2YToAqNE()=mdOd7cX&tRzV&~1f3|ef81Aqs=a&aZV%AM*^V04B1~_jCSZKW&1$qrzZ-NuWEKyVk~tV?h=MJGKnR|jh8-V~#D!oc68*aX$Yth}95^&pJ5xa;4rb*SgjDhq4CO(`oO-BhE- z@uR*6Dr%0tY^D=;AE?(VUnrNKlzykfW<% zwU20+<5-uc6NAork^cP)%vUC$A%6`TwqQ%beyxfyS&tk=!G-h&2@i^KBcqex%7}*@ z^6gPwWh$)(g~ab@)AcD(DqI?zItRDEFn~e+BV}fYkVl2AO8vIrin8SsP{(binty?m zV;`3m$8PHq@ZU~;IYYq)qJmBVOD965U8CXJhs^Hk2D@o;DM!pqhD>VsXxyx~ZvtL% zL?2+!As(wN!oHuF3&_K;aim=ORi&fhw`TGf;R`wxvDr(|jakFfj7Tn&yIk#&l``#B zlLTkRmWlxUO7<%J?pVCZ`^MlZ-3ZaUl0ZLYt~)sqqy31v38klNfsf|QgKqkyPqPBA z0df(mUWd5o$GhkxqNW+~DKt~8G$#;laO+3KPX`@*afX^Uk63@W%m$&)f#R@{l@7xA zq((|E@L9FcRJ+Y;WIHIp{h`Jv{~PMl7rN#4l~yh>RJ*sR*ZBCI=*Npw)4=wc$9sgi z6us;g$Mcz%qu&IoL=O29>DzQ5efTB=5STSyQ+o4;%AbQKn06aO!0X{_^S+@ zx6nMyBr%=X#y(Q%J*ovId2){uUWcAN+mD#Y$6V+;@jZ_ z)?L>J$E#0B-PkJ8&NLyb+IZrnzST+G(pT=ZnR`9jtSEAv6xO!Y}WYB?^D?R~3odbXFlGHa9kD&Tw(#~z_<;l zX<#HJ{ME~_moL`UGpjJBQ6q(+7o%vYjhL5!3q3Yiy3!F~EEY{|h#JQ3f(g1ti?v+( z#C%bvy_hIGUGcs+$@@jmcD&w^#dU5U{%0Y=e5j{ug!IDxTV40=>-u9{&Jty0h?z~1 zN9-HHXGESSjZ8?czFOrvF@1^Xm+_wZ={$D4pHU-Ml4abi#Nb=B4xsDmp1tQ$EXnQfH&xD1qztj7I{-e2KCu5g}Pqc03I%kpgy5_)yjhk-k+a* zH~@^uo?sw@Tp#0OmKR2Kd%zuf9R2BHbhbz?SZVstEnYM&pX)npj0+Y{K{p2VXrF9` zzj{)&9%^rC#M#Nr1e$@KE;sN01w#qmKf3f2`0X6HNG~jf7yv|M|r+cJEsg0U|fTm5dnj>0* z@K^q{Wt6rl_0D{+=C9fIOonu`QVrv6 zKaj32&I;fR4_soHzywEFBqttHA9{e#gtB{&pU=EeGz#1@36DjOiql|3o?|_f$j1Eyvr7jZFuHKm zZ+a!g76hGev|hdCc}j|7bePho=7b5349B-AE1r zMRH0bu@3?gQi_xalaLhY61IVK2m;bEx^XlL%4h@xq@={r9UFNz#`fj+;P)4tC+D2^ zec$i%|NePh(S_fL}I|a!bUjvSc~`lx10ZmDhOhq;14=j#q!!VT1kf1QMH; zkm)#Sb4kqk|WSgGsl-32tOlgU|@yQ^6?JEa@8i}j)xmf7nzq^ z7thTiTJ}tutnmbz*a{ilN$D5`K>`;2y%SYroC)%hsz21);q_pmA;@!v4*BR@>rC~B zZTG!*S@~+I`rL@^#)p`%o5vW)`Fp+LD`Kie>5?P)<rEEd- zIL1o5=Ba}`Bl=`8>ZBTe+COc5a-;-B_>ZZ8+dKXKdt}l?oQo(6#^>P9+JSX;KI)dB zQBvgBk6+?QxmBVd(g=sD_rw7d;fUiM{Z7e0UzH_)G3y9cxVXQ+vIee>`a`uHCO&9z zRqVsJZcAQRbk4c(j~hL}5`{p!3%;mO-%eemeF~?w;yX`qVtDlKnNVdDMWYw#Q-KhC z);17Aa_ZWI*gZWp$#3IVHb;AzG`}@m+TqTIV9q!J&R6iSCQ900MmbO zG#ES8BFPQWbvGry3OhWUCvVk*F{foQ6C3Sv-dD*^-TC{u?})@3zh3kKcXeG%XUsT! zg~6O{A*1(hc^Or>2}&B1JD%|NSI~LBVx=RKM_nZyOgDoN0dJS%1BpcnQOmEiPC-aT z^0$ibuf)+oXa9(#N2I{1aF#6jcf^&Ty!SSQU5>KhvBaGBe81kxF4%P;5U|Tv;ig4M zOjPpamB8dtvWcwnr>ZPVymk#YnhHR~VpfNIPsYAU1zYZvZ6YUQXj3Q$2 zMn7Lr-%mE>d>cIBG`I3S?dD75Ky5uS5%ym<{NG-z$wQHRL4(^~L)Zfu`)cvU2{($= zQ%qkTd@_24-gV|@QX?xU0ZCfFn~pdk-qWk#=|NJ-U=LB1&F!fbe!)qBc)&Yx9aH&c zhaKJa-J~r|*{;yD;YnjTJF-+De#iG@7I{uWq1LRqt$8J?vm#Ur77J&G0{I-zCEcDB zQ^~Lb^OnD}3K7r86>b7y*s*>W8bZpK&2W2%l3E@T(9#EWx_g@=x$Z{cxt0FQl_E(ImEtoi^=2vSpalg7g48qxH+W;OcvzruMQd2`rPW=Hs94ZQ(zSm zL1*KmT4t>~X$hm!{g>w=%(n7|#U9n2by_yau_n`O>x)hV{}2~4d{lno`DgGU)h<6N zKEL8-&3^(o=S85QU-^8o>T>R?iwd3UH(adA6nn+`ppk^R^ z6p7P9^vsHyYOBG;%cT0|-@1m4jPol0lN0+H^8lGyfd9OSJ z&qiz_tlhfbDXB?fEm~wr{@rB0dQqetYag}EPi!vGFxg;dV#s$ecrQQ9f1-~L&tdXO z!g^lE|6a^}wkcisga`Be}=lhm21|M1$^lqH|5fj_T&_tUO&dA0+ΜNW zWI3h4cc9NeyWF~B<-p_`v@6vldC)EIeuGznqEbBX_*sdw_vPGXyVp*u1FF{Kju$rM z?RK;EPD5F@y4X#h)NGlZ(m^Z-Q((Bi#ogZXkn*4^87_f1*{F|5clfn8*M1USMCq+&skRi&PaQ=TB6Z9L z-S;nk9v4JKwQVToVfXRN(4{6Ok~<*a%CppCR;R;~7lU$acuNy0OBw!5us}}IFTqMhW`}|r+CF>K6ZSV#i+5zmwxLw>raocTx9x0yW$Mse&gT~q z?OxU_o%#MvR|X&LG{Sbli}-$l}`(Ycw%JOtZrw3CFqkugCx_?Tp~>wohr z^$erDIWkc(3h^@!&oC9~1JAq~|2cV7n7ZR2Zrw+-Lgl;j-&beapcW^kKh^W-2%1lg z>IARyOmbe0^28So{3dEy#AU@0jEBm%%5~ZA5T3qj(=iVO8UY#T%xV>ILCABl9VNCvYxxTY8|9YEY9B(+ylS$9( z;!2lBs@PZLwT-M|#cKpgj(;u1+t!v@*B_RszUNiExbA3YJfD(r@DDX~*9^A05ACsf zd)Y(%H%;d%ub-;`+bPZc*LzxFEOO_@_qdl8;#>UhXV?VbuljBP0+#P>ZM4nh!lr49 z2BQB-ePk|>&ZtjL!uOcj&NDkj8myDK*m>`RlYT)4a*vrpNj3Il#kup#v2t0@DLsfX zndN3Lps7omgt|tO0<`L%c50O1xS3NddSGQ%!92~2p^=xYptt6oS2q4cV`z~g*ZWc8 za9&n{I2y?$?w3rDd#nIq{0Uu46pYu^m^jA~z|RHyca_-ioN6(MSU9BBAha*VoqV7k zE9TX;vx{Vk3Ml)71@lHRd!~Z8i^vsKhL^?427>T;LXzvLSb69N?6h5t;A%Vx`S=jL z7^V7Z_kNmE&$>r+iBQb(cC!ElhhJ>>B%*oq-nJE%G@@39KLK?%^%?0_USE z=u1Q&wn)|Yg5_TDC%uK5P7DQ>W#Ko7=C@Ywt%rD}^1CD*Ddpv-8}UKTbKkQ;!Ao6r z-Np5*Zkw zNJ`}t`6)~?8HO=NR(4e^BL6VEtq+!7B88KGDEd5yTbjukKjwOcH3V%qq!+<#g8nYH z+tU|jZf~b-F)Rt`y>O*@{g&<(qXyH?!0~0;3B5(yn1xSe+QEM5voekLl?RVQRuK8* z$>r%WOe8C=)8T^gqPz2=tanm(wimk+S&M3pnf#n~To?X06x6~xQFz~VKz#Li_OHS35^#Z9ltRAY+o2pyO6S2vW z3Ca9O|5HuaLFqO4?X>nCBR~^2K4Q_S^-C{g6C(FOeq%2p1)A}v-Fv5wC7=+&3v*ei z3V~LlB}B1hOwrsj3_a7CdRQdaj8Jo3-er_mCQM_Z!Q3!1UZTLT3sscJXwVsF=&a8g zrE?YfJBrQN*~`57?a#4`143^%N|pRa6RHp+0;XW}jyd^a;x15b}HQ3wWthPMwhQAb%z)HfywF9g0V-qx1@(BdJ-yOkZB zh_!Kuu*%6j&V<|E8E^5m!*7X+2akh!7a_V99jGdV1pLYj9o5pY8{OqfLuxp~HP?7n zbO8StXMN-?S(BI385SFs(NO91KKDbAuCP4PV>ZBQn2M4W>p61`E~V4e64_Bku09$4 z8O#r_t>lJ&mRuadULg?@XMAyVn)B~^k24XnaF+IO&uD|+OH-#GGGx4j-Fqb^H&(2X zgnmAKI$Cm}&P((#JNfi5XJ;F#1#*ts>v}v(eZ9!-MLe@Wtx*d?z1m4HI1cqVxDoZZ zTi|*19e_eU?qpt(zd^oEwl&JB(W0u*)BK> zI09x{s{J$MgqPFa@15)br(LtH#({d4qh?LAeU>Y_Gsx||mCU=<1@L!V7(|(LF*GU{ z-+6PTnD+;x)oYq8<{wlvd1W}6C%@L8lsnlpXzpATUT-rpy)DsXe-)}oNBC846FxJ* zK#Fw4UwT10gcyq6n+4k9#( z1(bif%``T)*&+28xbAWL;TOX<4m|IRLs5SQP!d*gC{R)iSe41JZi1(e;bQIGPTeaB zM9jRJQ)W0wG2`Ln6KzT%T}`jWA;o|hQ_i2--r-{cN_T+?m<}!RS3BWKHg<~^f-5t= z)D`zK80MKSd^x!*%p;->9<6b6JVXj0mJqA_5`<% z5Vh-9Q*YBUbF5VMFfI#$_?zQCiSoZ#3inEl(-ln((fRgxHtD+1m+)ALffk`P(S9L3 z;|}GMET-F0w`p5n+j06h9G2fnzq{QlQP>z7^v{jJ3d%nV%?P*=|)+8(C=)NR$wjd`tUW zyTtC|SM^I=)g?z}-(D7vE*g*7+*`+Sa}RJj;U{c13`MjTOlKq1rZ<*>i+^b1^v#w7 zA_2UTiAB!7IlYT@t=qhcvX=HbWoGF~jv1|#5NrM3 zZ!Ew#4D%dKA&>YYx)DTSQkNyEii|%ZVK>@WC#>yX-KLLlzq=L>zE#vOeYpyJmv&o# ze_>qk*$Riy8`6~3xWTX`ZiJQmqCWLJP*YljNr7&}{ z2!2K&5m(>Q-oFMBe*3zIOg9|_j{`daoh|uOEr>I2$P)e=gzKP~!tQ142P!DC<-9cJ zvePq+)@9LUzqop{{r$P1*t_%`L~g}h1ND}fu>QrTx0+JOoZcoP-48NcpOSRcy{dQD z0oc|?@Vcui+5gB&0&%cMmdCJd|K>@pMFn+(@E@khp@_pC6tMwHatyZHAA+1UciF?#j|olT?A`q1yAY7?&o>( z@Ew2H0uA4)H%4($7e^&b)RT zF&(>la!>5h)D{)r&o8;`swJ_DAX)J>=RHW;Ml@T)sOw9WjU&|ABj*Y(3Y5W$S>G>L zg9!;Zn@&XrIvz~K<V&4J17eGIm@>Dq%ffG**|qcHR`2B&{2MiL87 zc{aZ(8^nuFM~;=PSnjcc+^JuFsM+$SM?d3tS;#)Yl@zk8Y8l@lbPRsK1s`XidJx9W z)T1(QevuahUo?admqC2Ui9hxZ$Vdnoo+`bS*;d(R*;}?k2R}kyfxp7F!E{Cw_O@_k zQ;>@$4Db&`-ZBjMcNr8+KpjNJnh%qa1(%fa20iK7bdA00@QY~qqqn4zo;L+MURndw z(RlkW2C}CM4Rj@M&GBIPIcS5bnKXeuL=(qRq(^>dQRM30PRqWQDK7MTk^cy=NZ_|h z)M!CjDC$1v))Nq1f!IdMMs|=9fKvNDesoB7vn&EVg?7Mcn5w+^%xStmjGHg@pRq_O zs(4NGYso%xOjw?}*@LM+0k(lweLNayH(x44CO|fjgA9`Dl3`#OA46OOCmzp^<@~LT zejR7NXL^BHL0+z6BnD4#piemvYttlRHs%pA%E%_#kb!)Sd>!iyT;9qV5TU67C1$gcO51IbqvNzO}-Tr7S7F)phi!<>D}_lwC>dP{ki{C>DU9fQ($4d0Ab3iH7jb zBgn?sC_x={{(Us?*x%Et6nlUeE5lmc(wR#E;IVs}XNXwznvm*UY0^<1;-ZrSjZJ}U z;Vatu5KF@Cn|HxL_=XBYy~f2a@(qR`i=7-QMnJ)YJvR}re+I#7K@PkT2i?e@zL`df zy^d?D$1#;>-?5>F?-%ca8;e+Aa6&)%-Z7exP2OzU$yo;zJ}b&(*;(O`IC)|VaJO>! zocOgF^|K7P`|YDbMhv~}V1ao_J-B`WcjQj@_RtUm{Ep+a&S~9|*40ITujrAC6Kr$$ zk!>W3Mw&m1cKePl7~R$fxkLmOv*-?VuTGNXN!`B6%9k{AI(Y-G7s?q~Cc=b`;HAUyJzgXFZLsef4Hq(dnIni=%6UjI}pQj}3> z9PEn14KOx}5c5q^A565a*osqyHlp}fR!0(y^b$E8Bu-L9z79llUSh*D(_Hc-X*wXX z$pssuoV_CLYdHa!*IQrSy)sBu>lbDU)xGtcApMz1XBbeB{{HHqTff(>nC2if2Yiov?e%%N&D_V{pAPa zexU^+K#T^rS;Zfz8}y6!0$Nu-nLDJd?ZCEvY>R@a0)K1V&Y`0DtC!OFX`PRxNLrtf zCK}=H40ladlS}1T+ddV_kWMXd?So5g=ke3Iz5m#cU6MDA1l`AFhs&0vr zY}j>YRu_&aO1x;{)BTS=Kj0TT0$=6w3hsZ4GK1X~RMGnAlQ6UM;K}5zb*}p{cIjLvo+c6PC`iMS5 zT>L$cco;1sf4X-G5-{p%AA?w^=l*0_!h8?I{c-BqK=h{Tx03U*SkM)SDNYF+N0yF< z!bOPZq&DO_4Vtu!BoTm!4=}wIGrIy-L+WgO%fk<56U2Wf?o+nc`Pj*0hVzug?OgUN zuBQmv5EfQAK9PNICn^z7+)3qd$c$$kYDivZu&urC=<%mckBplYR-UWlZxr>=iCRV0 zEo-YnPA1J@6Civas#=n3tWl3}ZxcYWR``$>D?^K~-TRlFgE58an${a=fm)lq-9Q9f32W;uO3h13h304S~z5)HAJ~J$Eb}b94@8b*%w_uFS^IZ>GBj?!aBCj1`@ZCe7^iIPFc75KAOS)4O!!Z z`0ypdBya z1dz+_K$zjEO~Yb09kbLRrv!NM07AF-SWudh%9AlY1aigzB2yvG$oOg16;0?dVN@&_ z$oRH2hwm@D@@y~@B1;3w`_}7Du)EVV0jpqO`ua_^h2x3j#6Wutv@O{uoHMbxIh3F6 z@1(AVdHtgC_JgodY1w6gv=>)Gp92bl7oVT1`X8Zo)Wy~EAJJZ4en9R8J@4G0p?&9 zYOSJATQWa{-d7(~+cM2v>BA-guld}OwYbagZz6{(EJ)<(NN>`@^~ARdf``Yj+B zbzAf@J+K4fx)H38-nt%bk023=KySwMLl5@7(g$VE)dr6r>Jv`bzZ&!Kze`qs@s!7S z46@^Mu%aN@{cL{mUgJqqp8BoCtu7eOH$QUp5K~mM^uci6<$m}NSz@)c=KNwAst>s@ z5Ae^(Hr^~Eu}n|i3Q)QKLDKrIT8R1G0Ik&r#Wr+ax^yf*<$h)&v@%ZWE~p_y-q)m_ zmRN?}HmP`Kp0iV^?r*K@Ytn%k{nh_gM_m)j5u#OB=%m|6$vNpbl)2{h5}_+n-o;r= zyupu*oN-Oy1$l>4M_$rHhZdRD8+uLtOZQ_4{1fH!w0syR_@EvkaFi z)mEsB$mzG2MzC%qrGOsMjG(Yk&)?X05C}9_A7LFscN~!)Mkgo{>!^5U#>+*I+r7wX z76i_?&L=_jm0##51mB=Sz#U4G-5pz7C_^krzCqM9% zxpiNb2}j`IKCQL(g6pWuT51~M5ZtkKovFj@hf1)D(S=3DyW<$doYo3?`5b!L-t7;e z_o~njWTbx9v)ewrj_Qhjcx&o>q`IvTlHwNB~5m+MIScXs_^2pXmK4Q=|WZC_F< z(yd|Lop?&z8q-cH^-4udqp`g4Bu-ehUeLL%$M^B9DLqwLZ0)3U&+_JP=sw9rwyGbI z_kQsP^0)|tmmr@*hxW+(svK<4?{16zdvJtG7*#dQcznbT-*v@gDD0#dlQRADR*kb>G??Ls@~Rla z(7E)a(|@>+;x}qzxz$oFq( z`HV#WLUQ~h7n;Z&ASm2LYefm4G)l9M7D_a6xeI-H1`|R_gYmx5(oPr^!FA!GTH%%KM8$ z$RcUgFoy?N1|pxT%oO^S&6pN6-@m_hv-{<$)@Aj+wL0H^{&Dm?-0+GwVEDz>CzwtC zz8o=|G`7JCiX}kYQlVRaFQp=r1+28^eIDjm$t=YD_31;yoYq&@V1GrBehujh5BU{|`2KznX+6x=dGf9XLIa`oMTUaN zXI_lm)l)27d5iOcA9bl0_Tp8IgO`$H6S%}hHy4^6PpHOl&$Uo~ai>x@iUsm6I-b>4 zT9}y&75&LoB5!|=mr~zY+Kur7 z_FQg$lK*wMoo_s9Cg;yF+si6>6RLJ?0W9O&BK{|J0$?m$txuUNyq3CMRXefKP+I3Y zG6l(6>_E|6h@l)z?lV1Az)IA;q|Ax>WI`82+g-pw*YM8z_!dXNbBlZr=+Z>4%6h@E zC+6YNUtr#@6067ih2yCy+m5SL`8g}3LepqIDYa(ZKsVR}E~wDNGZ1l5-BKEM$UtoL zd6v2}Mt`kUQX`xR`g0i!_XW2isk`P-6;yX=$RrSzh}%!mk@eD*De9MKoaC%SM=1dA9E7qNj_SFS<>oJ z=(5N0gQ`;Zo0#PU-G~)VMMN!Gt{1`2p32ur5}Puz*|vr$Zm!YDWyK7tQRB8uXVoI! z$*O~1y;-Dr<5PTF`)V^FLjBe^#X7~LCcCkKM<(tf8hTXrRgN0LaOC=1+l99(%N}c} z*Q;LsQjhy2{pMw5;@&;!@+E4L|I!Rl&_h;VuSU3tSdIUKp0`0y=SBl9d|ZV~*6{;~ zNLVKJxE)(;b6hqgGiC5xjFjLQi@IOk({w-Fi}<&7+YTP|1Xne_0MXHH9a%-Tu%Z)H zh@7ZEyrV!O`|6S%2fCb2nvI!rVp5OVgv>bhvj+H)vH{gRTCT~fi6+p%cRNtfX~cbp zS^`7lKti9`v|85Y>yhIN=lM8hc~q;Oiwtf1uyUyrQ}Z(TYB9MVOAv;MwjPc@MzScr zOE4Ta>-X=w>v^tZ^t&u4Yd0nEYdBzk>0FBKoeqAl8Oa`lIZk@ZsKl3>XTgw?kSBrkk))788Gn+Li^Ew)4ntP;Da(te3kOudf%I; z&Rnvg7d_DTJ7DrPr0w}M^a2l$b=&OrobXs{Ii`13wpju-go(FTu-lhi@88NJ%-7u8 z;N&8S*ob0(?L36U7B!4-?q?%%?}&-*+}jngBiRW@9O_sNA@YWRRKwluo3XMi?5r>U z0zvQs@@v}#Nbh^AwW#I9I0d;8a2jxg@xAu$7B}^eQ=_jjqOM=xc_!p{?!Wk z@hMSpqeY|V8*p;(XBTGEP`(e0tuFyD59RCS(Vic3m;y3)SS+^>^g&|A8wPm@ljARm zn4Q@C+?5O9JA1;nhiy^E403}wwuU0bNv`(w2O~DrGEbIuo3TFk5)OD?H7&dHqF#rg zASOx;-(W3Jj)Z7lBUNy2T`cJK@=v~;$V=^|@|q*ivd!$s+DEfND|vf$C1ha; zG?SLIS;Td^6CdbI%5`7zCneX0m+7^S^;;xP`*yvHsV?N;KQ1Tn8A^p9BD^ERNIdn` z1aBVt$H9BtL}f6*c`n^UB>}iNebNzFvBgvmvcp@WI3tT_5C&)WA5ytsn85hu{`u&x);fnqbiEq~EVHQes{AE&b0J+jWRw76D}`j@%OG8=SWY3p%KWyQGS zVvLDppB^#&wi9~%kbJ-Y?nzQQuk3|;7f2!*!y}I%rC0(Dj_ANxvIc0i;2!;5WB+oE z7(>=^C6k65?t_q)G)NioG#@ zRu|oh|H@_E_mMG}K?Vb%mUv`OA4Si@%iStq_LHUX_rEOSfRX{Z2)w*{1X5yeRUY}}=yzwc&x>mxPht>< zHN0$yJG~pS`dyuN`k>_SGLZ*%5?xWG5i%7K#OI?E?u_@zy}IG7&~k#YIc@yYdqTRn@yjBp;w_B#T}cPzTNwv+z| z;U|3Fb(IxwoOs%cG`-486clOfE=-{xIsd}tC0P{^Knm3gY4S1Pq8o`k9fKASXh7t8eSio8Lsjh~htHGZb zOVlIZU}5TqnV!|puzFX&c-a_}y$=9Tgv|ZgBLcbRMH6v8uB<8PSi35y^u#znVrH3o!)uOirb0 zVAUOsRfA{B0vDdz#P~$(PXFu5o5+6>Y-F6u!Ok?V?JoCArzdJDpp3bq?9GOCWK)rE zq-qHy(uDP7jQTZzK_@rsPL>W<+yW_HhfusFR;;Yc2n4zY{M&q8c&X}kgfXM}iYf;= zU{!I69le;tpfBCAf{**iyv^1I89p#tp>}C`!g1Tb6Uh*pJTzd4wlLoCF;YBV2IFLpbr4tH0i(cJnckjK)LS1d7OKiv`VM;WbV!>ICl#2)Ck4g-N~@%g z5YN2hm=hpzHx=WQ)*6_BbxS^d@c^y4w9OR^ogZ(OTecuO#&ndOiN0(GUR34T*6p{{ zr4ywyx<(&fhB*vRSg-H(@aOy2s4hvbk-3~c{y0|SDtspo7m+*{tttu|{DK(4tC_g4 z&gy*iA$#5##m!7>u<*@2BH4``;7mvZEhbXzjucsU$xp+Xtto|*e`nYz+UM4-NSCu( z4+9eC)_A;`YdYB#lV|T?>Q?3c+J0f&e460t>{WoDZCq>t9eX_B{4GRpzb`BLQ77-& z1mKm>s7x9lI#p7)AAz$oKi;dj@_8U2g5l$*BnI-@{qA4r6hveju~&Ylhdhe}%EZ>s zR^{k74?TmN6&Um8iP@v}50^bs@AWI>RZLdK+&U25i5MM?Q>9mbR?W2zT%7d9GuynC)(2{pGRuWk)bCECJ?AIW?s?2$`t(@uX-8G8vW8D|gyS zENx2)vMZD^YUKKlC_VCItPs&T@x9WJ#6o7x9yw40Z9vafkcJEAshH!4tm43z4em?w zq&hI0)aNb;j=bpIBHl9ii?qU%$nW!ZNwdY?AbANRE#9#Qf@!&yy_ z(yg1OMm`k_YJDYgz#}8`-k)^jQsQ2m1HFBxFyMjUlXeJmT4zv~KTkmrhc*@aLByxq z#suV{ytn^FDoQ|^cJL*adRnR6vCeb19eMhEn`igjOYZr$^>G#{CW%(u&|U}E=ok;K zuGm;&;2UWFAfgv@-=FW-+O~ORyKT0A=~_Gx>QDOGUz^hj6jh$k{?*W~8mCt5VZZ-9AwK00Yp7ZF9AF4O zXKh+>%rqvxsc2)5nA15?Ar2iPv0JAaAY7{BIP_Y3u~F$*zfh$U@w#XfH-aKQJ#$}y z(x8MV-1w0z^!^&k7``O^w0-LQ`Mtq8tMI76Rt0Rqx*_+ZkPy#E$e5-*v|MUa{Yb~1mTJNz& z{yC&Kxi=3u;@Nh_{=0At&QFd|JIS#b6aIRt&Sdy1ba^w6bjXl+8ejef+V-K6K}xeo zS)ULE@!5Tp$^}0gbumk6o3J5-8tu-XT*VM_5s~~J%jpr1`x!=4$?2*hj`qvkP*-w1 z6Bqn)D%yQfPc#G6LH7?)z3GP`W_~qZ(C1F6xx_y}l_cWf?NiaJF^%Y1h{&x;P5kf| z0e9JEwO}S@YAzp(^&Z<8)TVkMz#*}{zq*mV*!}IOZ2bdDd(LSwk@)vG>0dip{ya8V zhy48~g4JWSg{z!)wfQ6JCP!yBIi2(dddj;scp1pH3)yyzLu~XPEF6G zcVL;ol&6wza`I1XaK z!w@%8le*5#`ynz9DU00U_&R?WP=SL#9&F~K9Z1{Wkz{Iej;hV%?a!)9qdwDK6|Hj$ zSK5uhtOFjweXllG7N7G-Nm}!pZr`(;H5^9MsNOYEzX1O!8~)Hh7C)M{FZg0;nEdtC zi2(0_I%Ye))*WA#rj@Qr_9Mru3WOP6<0kI)!LtyeFYDrhl}MaV=n$h2DmV_(TXp~= zE3ZM8kQ2f_wRXFez7IBRF?OVjb`(r{d`+M7yVANZ>9O2i&XHhlK@EtOU8O~KbBBf6 zTQ=(bJl(-=DQ*wMK*6zrn>BHIcc*&>dxXynmY{VsEVm&Brja7<6%RVI>*NSS?dU+Mmg zYN*2$x^9WW`=YV z+A;Xr7VM&vDrfqt8<0O9Q)y8`ioNE(X#OyMJZA46iK`B)LM}1hk`MOSbnHd19O+H7 z`BEU6xW3qogsshabVAl3D`Iio9xImi^?58Ff1p4D%HmSzxr@zf37dM*IH3KN@0H`C zOl3O^DXx_}o{ZQ_O1=4PxYv%26{_bPj@I9cI#_S2z#TY#nO*Vj<+@RMtmF3We$Yrw zCT6zGu4DL1xRI11bO zFs zFGzljEKIIb4~)v!3q#t-UPv>dOWd2m;iLF6^z>`(`t^K0ix< z`@GVS7^VBcLWFq`X|^v6?@i~7*xv&nCE399HZd-Ac`s5|2KM(1`QcI%*q%>q=)X>M z)%{qs4GFhiljq?!yo~iJ6Z<<#ikIc_o$BI(O^9B1qOt6@P*2|)($ok2_n-EOtlH2I zs(r7r+Xlk$L}-zTsZbs3UAWt`0YR#`(^y283l)^?|DWaMTxm3pEZq0z18W}&KM|RW zc9)7m7s+ZUza!q;XxzP_EvNfb(OOZ?^>7r>zq!9jp=uC8y42Y@kRIEM*N#7z0a4Acc)1Qn z{H*jYq2bL%a1L42WGgW|GAU-{=kua~@It@y=kJ(*+s<*GZ-et<``S(QyOk4X z0$0+vx-B6^O1fH)>*f|AE0+QO+Ge90aR>E9LN(_jh(0sa*LQ`Hf0D<}xwFpZ2Qb)8 zf&v}&-tk`O;)K?2UM0^%LFRxbP?=4Dc@?@fQNO7PX>2>9gKw(Y8Z5*;ZEO<;u=|B5h-+|A%=wVd^j#ju*@ zN-@g-JE!{aX!4b!ibBI3hkisgqx{vDGiSGX+2EiT6nrOQw)1f^j}I1&#oUzstRmC< z-HE*BE0DvZ513=naW8b90yXU>P5MJ zQurLI`R}*c&ShE)9*=S4R2jdPHF;Gz(+c*``j7Rebk8Q1;EmG76=cuVv9>tW;kH;o z7gf7t+dt-Z%74A7HOjgAdTX~6^k*c*$FWqvxkxB=j=1xoaN$7UK+ZZUiTE0w`HlCHeeNe0Sp8x; ziSf5C4ZJlu8&H0d-ib^KDqrgF_(;BV5|B7n(p3qD8z8FK)H|KNjBV=>17@-aDLP#E zfx)q7cswuphX^9|04PBNFaPr&3m$IoC(e&1E;Hc}M9Koj>T|>`hxHDmjPV(H{P~cL zH8G_~C1kmd@4?wQ#*P?g7(0r-EpnA9&gnDj*+~p9r*aoo;Sg~xY<;f&^)TzMrCdHc zX@H5xaL?m42R0TZM|OaJdwpTILxVdz&ICXkXdGy-sqTQ2JoYFB<{Vwua;ma?X-rf7 zZ4dW1t?_Que)2g$DU9!+aZWIj7+u^hWYPMzT>rfBBF7=kv)c83SdYARD9E+=9nI!- zf6aoMg^oq{4mdKeNh!8=ypt2MVSFE~!^LqCK|@M|81OsZ#v2AZeosIu{nfjNuiy}d zCgs@tI^FzlJephWe>8o0G*tipf2EbAk|fKtOO~mGtW&8ZWsLiOiWFBmO|Rb9ee0KX5K=Z1O2kiVKuihdMF?G31)zQMpc zp=C&j9QOV4eZ*u;tGezEH16iA@jahiN0H|dgEcl!d~eM&J#0gIpKLp{LH-0qd}DF8 zW#JuHacitdWR~!BH<3t7Q2|k_rfhfMT4C>vU~SFkitMXG)H1AQr0~JAr4eDaNZOk7 z{=;5({m$uNJ`mb4H*VO|H%ws|KG z&Rxkl;d5$b*2!tr37j;waAsOv-dLAQPSM(`5dZM8IlyBe=~NUUg~cG&5=(a+#J)NR zi7^-0(xmyscWI5-D7(;cp6^fdUjZ-jtAUKhN@&)BoB1tDS?{_vmm+=z!b1qJUK(UQ zvMOW#b3;ZPq|ADP6AM30KuLS4#(ThT1fiHGnYL*K%GRttNAyOA+U?udTYeYVHHG1% zb(M8sU`w4!e-yRi_1^_wC=lb*SUcRo{u@VNxhb_QaAFigG~VeyZsa~JWfX7<*LD&o z708$lgxbE(r_a@2W_*Q*wB2@n@uU|_|TH6pBw9y+V z?DMtvD`uV_-TArypxup&?5_jd;gg5|2%WKwCn)RPwR|2CbHd}rg^)4-g0H`JN`BRT z8&6>B9+hpZgR?{R0`@I@0UTbJb-nex%QPf@CHjp{3Z&7eqj&78`2Se|f*J)40+1ra z-S!=i{3uG0cLaFb+=LVx5x5RwF8Y??W&dQWb|B}p_)W9$D;~*(+>RwVWtFuRwr8e` z-`Ou=hlekk(v+H&kLx<%Z>C&y6y;Cigz&oe0t=pU6^$|R8qu0Rce{l;J%>ZqU9=sh zln&qmE>j@it z2Y7(A{mH|F6!z2p(5+b=QiI2(l#89OUI1fnTLkA^@$02acD=jJ;~pp+-lzNynEuu2lF0#2gKrhvxr}x zKbGx0DmxtFIwZ;t+RG|dm;7L=o;a$U_euyaccKIE%Q@*W^L!#)^F;pFYuErHlksx~ z{g$ntTyjZm{rR;}@-si;=`MEg%azpSlNj8rW`YpO*J6WMmA6J`{D=;XRsGrISOBLR`D@^wtcfIbX0}G@ z*h=+gDq{O0h816R;j?cYRYxL`aB5GoY~u$#48TB3XYf@u&h))q>eWwwdXd+Wxyj$y z)yd4rw`7Liz0@-Xr5*~{A!U6rqbry2F2AeKrk={Z>>@b8Oc&(bD^}cYuBEzD0tg#U zAx=-dUNo!jc>8wcdPeTG{ju}An}$BW+2QhBp?NyW%xxkVk< zZ#W|S=TnhUMc|jreT%5n1z^s7JpQe#*ZxemS#PAt+DcSIfxi!V6dq4;+V4=e=0rDB z|AhEd21K0KhhLCldvkVqXRyjnZFjL}1-s|QJiiqd4@zN^5B4N36Clltp;)x z)vTXgT4dLtTAk$a6~$-9U)x4X>KU*V!iXM6s0@V}I?C zg-4@C`Cyoh$rHrmK6T$cO7(*N(H8m$x|2Coj4$Vz&80s^Q^X z(p$^91jhGhX!cms+yG%M=<4s}A+_B=uHfq%@FE|58zSkzq;TL-4c3hW4QWeq?(=q$ z$i2b$n(1}k_-yH04b~wCPwR)h))6>=x^VEMX=1NlzPi#q8XLPDJ_AlxodAy<^;YExsvqBa!z;FI!9#W>H*IU<@Ge!Lo6v+7n0vU< zKX4?M@$gh%9Lr~Nocb4)=XY(I_-)5KOsRNMg$4{asUE|ZC-Ube;uo@V-7SCY z=}6COA8W=}4)KpS#*0*LJhX}FwrN5n+{uKgdt>P2CxuFlP(on{zLH(~$e6$En)c?` z=w)5jU-u1)fk#i~4}-)VfyRDlA1DiFy+721?}l7yEC=%)$_@7k?fiWI+JNt-hB5Wj zHNIC0US?~<&6#({R;aU@rrWpI&f>$_qpbp-nDByOlxxB=aa2Fgy}`+keuSJMhPD!h z$~%A*{kX1(m>c)NzSP#@$vfYZEA1A%cm>b9;up#?o@^MC#!9u(Yubrt)Ik@s2QWcWaudA@d^v{ysf z9lDG5?E@aatI8+Z!#jWeRwgM3YLxDW^sy_+)(XUvEH395HP!p~cV?+pw=fo&XsAag zvQuv9&S#Z}Vdg8wUl!J;)bo`joDRWo`+jMcCPJSC&TttlP3l&OlM2nyZ*9N%!;s&G zjsf@)R)I6m{3CbUh?ieoKUr>n(rD&->^|jK6hzgBy3^m08^iCq1c!6nE#Pp94%b(1 zC&~E12c2gIT2yDB(3Yk;_6D2ne62veMJX7uc|1`aRsk5fzBnxLqI&Hq@^#DIq*z9Y z*@ZN__M3n82neEPv`o*vBGX~14_&CF|vn>$6c_q zmeXxGFSbO;>&5pkRIiHduVg2gtG;78h+Xw!KO{PS#bKM5pD6-aYHJ9fI_Po{ZpA%^ z@DuCRenEI%eo_F^Bb@()6C3nJ%@w5olI0pbqJ?enK_AIXiZS9_x*<`Na5^Vb9Cz{W zAYqN51Ke2v1-E`>m@v{aq!(F40JR$3d+{@OsFYct&dVhF3us%Z(%c8gIvHTMmQX6> zh-VBPA!`yd>5$$~R7Q~jAh~0IHw*M=aN@uZOhso4Q46?WIw9H)yiW-);u%pPg>jT3 za~E<<;|N5q7MUS$k#_m*m1gJe4+Bdn2hxvm#=zGNY=gno-?u)ju%FnSh8AXh*S`%B2wg%TL*^Q=% z77eKsOm#Fv91Fe>YV@|RsVWP+?oI+ zf=Eo2Sh7CrPR#k_7;F`7V|L2yGP!q5PpZ2L+;;>kn~vp&j8LMO`#Dxy+=X-2p4URb z5>oa1ZKTN{{D&Bw$@aH)mXZ?p?(uK8qdfW`g_bbL{+w(yYg3!hdpGy@IE72u*yFiM z5EOW_%br-Gxy?t*QTfIUXHn(;rI7D`{hd)3TkM@FgVN~~c7Lm>)ragg6jPKxZyx+| zpkBtYs!i+8FDYCm@Qk(xUVD5SVFUD7a6sVAJ>&Hp8qzNYvh7InAhIx+vtzHZh*+5PMdB{V4 z-?*6X=;5+Z@0SV{B^t7vyESo8%^?4(p~0hx%O_Mb;gmT3>OVp|y-{ldUuICs4{loc zTd6P7VxYx&^!9-l=@5gMQS%MTB`4le*pLN_AYuG`-}mU3sgx77UkU6hT_PCsaCYdD zaB9J}pht7^1WF4C0~EI?N61=$oNd5Q?fb98gHkL(^?sW!NEGKrclXVeIuoZRrB-CR zcHz!>OKY5S2RU0lk9t3)lly!C3WS%c8f`FeKBQ6~_YzX3#T?FYIFkMHW0b@K{dD^; zmjuY-1SXTye01hUzB77oAEUry+e3*6dA6o?Epl5ey5gkeT<8xHpP7t%pQg7CYu;R` zex^QgE~`Z>Q^wP@;mV=(20rNP`@(uJ5$=GfaNPm?`ui)SpLr5qk1t+sul8Pf>a}mE za!M!1?!Yy+j(5Ezz{xdrRcHL?eJN`p!xuUj%B2KDvNYm6xi)|xvho7gHHTX8SqgwU zfOwaL!jk#wnY%dZYhib^Zx-79RlC6T;ZC&~Sq=XQB`O zQ5=+CrI!P58dll}1DrU(Lj)%mONBMr%$K+@cIe5tw#5DBqY3ja4Fx*lrMVOvoZ`ZW z1BjxJYu-fp@*DNF3 z+VKIc+1bwX=i!j&sJyRG)%dGJR-rv|gg4UKmplg#{3&#PI+^`|q^J;JYhyT8J#+ko zz%dQgvAo{D)1}@?`X@E`v!iikp7Mmlr>Yp?^x=?%rTy4~p`CTR#z0}vV6sFw0xQ8* z(O>S74*ckpw}&b$b)sj9nTcm;7Il^ePRCkFdUJ0RHk8zD@;NgNggT=B$x=Pl}0gZj&BXfh3}%Bu9G&Ili#x85GH);-Tn3! z1O^J+s28p;xOOj2eF#>X!LpM5I|qS0ljy5gZ`)t7Dd<1)+kj@fLC|3%AX|kC*f4hX z^Jr~h2;KPKhU}0JZ^6zrN62aX5#13eoWr*(Hiey>0I?T%QBL#Rg?MGja(XPGdnuL> z&JmWptQf{ATjdF72APPL|5nZkfBI0qnJW^#h7hz*(3UEan=08gGiQWCEZEpg{nkz- zc-Xy3fG&jpbEK0k>}!l%!Pd;XN9%&@$wrONdm^NGW*s=ZAq;KQ@H2Z`c5^J^s5L_G<+s zvDa)0lnG-rVHd3>w<}NxljpQW)sTO zFhcFn$hPjc3s-6^P(<(n;?RM@Mv^pGk0Odd`rGJrDV+OykfAKs2_RljYF;#iNG+x3Zy1Yhq2WYF-QHk7IG}X^OFeH zN)_yMA-2!CusF>Bt`Pkpo@5J_!x4Rf)O8L#yMo1K^${E)7KYli`GCTA=#w9cB&d;@ zweByxa09Jy@AvkFqxY+(YhUm#Tn<-89q<=82G`w-Z{qi|Wsmt_Y^JfySQHSYYk$+# z2Z;^n2o>evZW=Qj9hL_#9(`dnO3bAPM}&B)rep@8rgS;=F{PR8cSk<`!L9C9<0Io$ zOJK(qPMSx-<+=A;R-)}con5VB`%QmvCLNRj?-L%!Ne|DLj|<>tXOEF9Qd0Rf$YJKU z^peMXWN^m;(ejhicT(2=G`)0?y&%acV937u>4$-doRb1zSc%nmTgOdr%FJab>-Yx#s)z_5$JHVIJV=BUy!$HQ4vsW+cw ztT{RRI?HDXNvQf~f~Avm1^#c~2s;>hhwHh{JqYf&BYH@d`x=13i|Hg<4nPm21|t)k zX7%B*fviA`%VHEkksF&g^l9@fz7v&EFRc!d=JLqTNY<(|m`u{^2QlaG&osk(!VGmz zg2ryS0(s(Z{w!XHsz~ZHn3w2`P|NK_jxY7woSP1v?1WzsECI=@td$MmoeJ3kXm$;} zWU<7;5DLO>Zs^DyR9T5%P#ZRDth9DD(Vg}+AH4WbrTb!dRf~ZU@bj5!%)i=#V2!;w z7OT-47rShQ=nYEEPEO7@9754Ye>Ck;VUn!@79eiRAES;`ljS}dkNeD%8qEFvS#mz_ z=HN-(KV3GgH>=_-t!%P=F+k4ZF>2(#t<*aYde)DmZO&P&y|tmO=!D&d?vZILS5D{L zY@Xj(cSY)SW{}fgAAS)SsqyqHk1E$J0@eTbIPjOUj&OJB3os^p8aCmFikE{xO+>gN zSmIR(ZWyc&uwCM|3Y~J|Vnm^=U+=1D81}V)L+%SuCfkckRU=j4+?nF{91h#HZ_L13 zZ7C~Oiqr-nH;DheUlh_DSJ=4rk)&7lej4Q0ta@<`2%Mtg;lPf#Zu>C1-uIohTzV2t zw%KJwTdT835jBG;xjuur)^h2JMbAKhL%FX8g1F9-0X@IlIPS?+1>15nvCuq8-X z*nX+MG-x>&+(j+{-^RLcbB9fN^3B`%W#+%A?myhPsm+yA5`5kIWKXq;%Q3N0Bn&kf zO0_L0#|bwJLlteb&m8g-$a~%NrHW|4ePuUjVCRM($$HpUHtmI~yo@{acr@H$h(naReVj_Ri zPI|2#PZI6lmnk%I_3xw)^a&$Npuu;nGp=~*=66P!h~ccfO8deiA9A?#4Rb=yN6bL7 z#I>Q=eK`j#cM~SiOYc6Fd=O_Jmi47M39U#TfV7Dp$vw@yI_?ul=~oA_EsUdTt=gj| z3MO{9N()zy?+?jJ9WPh4jn#gn*lK-nx+yi+E7~&o=Y*ldmgTFbdwr6_U;8cqlH1Df zz7iNVTgOvBI%#gf_cPPWP0Yx3HsmEHL_I}AYOH(ThqL%2`-1#&0hY5eLMJJnK;1+j z^(?-?2q4WZttg@6syYmY%4qgeN~`n)Vy8)TWx^lr7Y#p5J69xN>)*&ZeD$hO1kMlX^Y z5aDa)$EOxHW*WVa2mjc_KsA_!$kV$9nBqespeT0lp`}_Be~8|aXkBT$nU%zq0dw6mi4p3%n9``h%+*zjSSrgD% z^3#o!kWr3xB#{o3$07Jo#%~qK1uV(!*>m)@;YPZKOZ@IvjLdU>Zbl<7!C$FvuX5lj zH*QbAjU^^ZZ3eSF-@#?f%@r*Wv&pmMZIK;ikwK<-V>v-M`^L*s7`+k1@6z(Qi3$LWL+cis2gLSb9y+cU%Z5{kgE-(=lyJ$2<}#jJ+}#Og1_u!yUS+uvbIfiPOs)qAvg|d}`epCBl8}DC>T< zeA@)EN~C?rIsVb2wAEe98fSsc#9CS<(m!mVx#hcoFgF3v4@uup4h3o~Op;mFNP5RrEtD z%ao(wMIHN4h1Co%xUrd{5mv^XJH97D;B!AN#QxNYnH2QP!k8Ly`x zMd~M@G!Ek!$*$h?zexwY8VbI`fJwbA+Y;=TW@nyUhhho#ah@IIhL%FSqPA7=WtDpp za4Bt8s6jTm+G2uOGf6}|Tp$R0mw?GfE!_O*7=@dKaBY$tp&M&qr`PBmJfiR^?~)sL zLgiYxvi;O$3ZCn;ZeZRhvL335juVVXvi|A2`|rbM-iz5v|Drt48QwJ!rTw0?3CdKR z&*OhpYtz_jxObCwCc*degpJWgSI15~JCm$-ufQT*a-q-|Ek|dZ57ZdzrVXX^enLVg z`#zxp9oMpE%s*F|ObYik5W49v02-BX=gZH@7d8yH?;v%Yxau^SJ0FR&0i~hxM?`(7 z+!~iX$)G=9rJT$JwT1Sz`#ZU74G%)Fw*tLz2a$aLn^^2RIY>hg&a?i8!n-@{P-%c5;LDP464h)VSP5Mo3wiZTXr+yxkGt{oKsy%Qz!08xn zm?jt1`%~_uX}3=jVk<%Lp8GtZ!M0xZ@=ey`FT?dt6P5?HHdN3oJOJqX#yMv8&RMiM{KauhZE76s~a_usCJ_=>yKO!$$(!F6g$ zijpXqfM(`(y^nc}eT4M`U?vsp`l z)Ts|smQPxnLaGdsXD#o~vQpdPoFXIgW^2O-0UfR9@1D}BS{UY^$|UxUb80kU9r#=w zWcCN!^d6Ty^j+|DwIO&#I@aGYW?laE|Deg7Li~l6zg!ga&JXi`&LFmjLS9~FS1-qb z>YOM0Mc5{~4CnmU*$PX)zBr5|p+SnDpW6GOtIyOhxxyMIl!!}bl3nt7fA0Imc%GRZ zxWciPZqa|WNb?dbgZBWL2gje4(>CRj4p3d*>fB_^<}r}i;dh|C#_vRIeT?Je%FwGB z{Yiv&w$T;fS#0M}S;`$zP!LQlc;5|wh0taRk7whCq7ovplYaw6umuUP*Mg2dB>Gcj z|NO{Xm%^&Py}JBV6cWNK>oB&T9QpKpitX?&8$IcinVhuGqIf9^@?)0<`L#3>^T3V}8N$#Lh)v1T2qmNKx6$lo>$Ry`%phiqFo9r>tf6c$2OG$*GeP5h z&Vc1%GTd~Ga5ZRUwhji$G-2{@eOlSe`Ch=qZwBKX#h~0Cy$XzRr(y}xiQ-Jv&Xr~W zzUp!?gn)xrlMQ{dM=P(%U||GCQcsN%?#H{6R(!%WRv(!{#BMroC)s9@&Z{5%Pke#C zu#OW09IwLB5_=_f{@vQfXLsuGNA(+5?2ErOup<2QI(aS4KjvS~c={3h63<1R&rV+o zwI?31bNzY@WOVg6l{9LfNJ3^4r-SdHk*@jE_uH^W@y&chs_3qx1YH3+8S8$o5fh0$ z6(eXJ){AaDAlu*kHHFoJUSg=Q_ZT;dSx;)YEVk=h8QWibh&{IBh;qpFabU8P*Wd#_ z#&^&(*TVAIHBd3)y!juMN8t_(u{DYzwm;YxYn_uX5F;VQLtr>n>koQ5-!NX2y4N;z z(kOd1LNn}m_UGF^8Q0)NP)Yf$N+>uN z)4ubE-wUeRkPQ09$75l#`wb>;0_IaD6Y1xnoz4 zns~kxO(@Oo3D-G@owqex$ov9>FJq3(obonjEr3RZf8j8YVPs#4V>O4QPqIP5f#twVpJ(&bk71 zwKu^v;W%62yBfhNV!CsslOsNPh6|r-WVQ7OdC9Y*<)@4mU{p!ld#f!^{F=b@#e(WSt<#1V9%s$(|9i6J>kExW8fo*g$aJ;qj2Y(Op!0z5;eZMXeI;-Yto{`Q0<-`k+EJ;vBXvmu&MvBg{KLrRGn7 zj&9O39p%OOW`)^u%`$}wEXKFIf%8a>8THhWZzjm+LaR#T6GqlFVzvZi}D^Ub)|Nr%B=lRey;W1 zf9O{MJ`Al;FLQ-8g_0!=0H#>u>vNI*wwIcYkh2+!%}$#QXYuji%j((^QT zq2d;P>nwGw`wYK}?ompA5`oU5R;uBZGdtha2c=6EB)uth7`85?X_M)t%S|^QA3#pX z%Pl@`c-&;a1INlSDiR{m6ZlAy-OM#rd3*nN-egAE*M*XH2J+`SVi+GG4}IaP|D<96{Zrh6^+MUT5%Gt@tMIbt zngu%Tb5KncD3%0!k`^mx;aaU&&e0zM+A!VM1XhMgYlk6NeyhGcjp6PlbQ3? zf78Sv3=6p>$2IeZGgI2xXjjrW%v0W43#8hp;zJ8Uwh2opRSsMRRCBC@3eMs;*RsB2 zW(pEH>GZOYZFvqsf&AXJ$Uz4jZY5^9C_`j^I@O)DIVfH;%`R9Q{tu2`{*Q{1^}UPD z8{UAM6fPRfsmV@jIdfjtMwxoEiP%hyPMc6^#L!C6rq+x%#VTCM5g zG6Jb{fV~z`6$@4`Z&|R<*FOtwEQ@FKpHtpk^CO!r*{C}oeXh{t^x2fwo}f2#8sqvn zQf{A7|Jn!~f0*$y(RCLkbZ=?Kzv)D(#0X;{FMFIW{7pqeyRj+BtFmcq@6W^>>wt z&{c&tH`(**_vP;INMg=_Dc)L5vzC6dpEa_Yg(t7qFvy!p?rPvdfhUKD2eKdu_ywnE z5vzD)i^cmzV@c17oL*&aC2u5>o{b~MFToW zyGyuQPltSea*2~?@ysHzTseJsP*XYgCs!zXxJ~l@q3zRuO)UB!=j^}zBsbCY$E(9T zCnZ$084Mx~X~DoT2ppTeUmSBZ%ahoYl4wF3eiibIJxGho*2j-IHnko$;6_D6X~Bqw zLRHdgLURF=6jsm*xkjDE2c=D+&6SqD8djFnAQGz@;W=H8yrzR0J(vcnC$gv}7Y8`)wxG6J zMbt6qQX(|AzCc&`Xe0aL@w$p{@H6_>n==-4Uv#QhaUw|nbuakG*CK&=?;E4u-TTaY zqcnr78Z$%UCl?$|v68z!3S-e9%yn8XcVs=QpJ{l~Z(Ee`k3p?Ro(Sr#^Owf$)ljtu z-swtp+6`y#?YVLE^JC*bVFVX2_lOc#wMqfc>&6Tr%gSYj17#c4frN=1iZW1w>&Ga{ zLk54?XTNB{_HKVuNV%}UC=pftbGr5rv!M}%Y|0cel~4l7&Ey4r_D1=Td$-LDK>`&T zPjCaDUu6q}jX*eiiRmi1edYEo=V&+Nz_*A2u39F` zNuzdm@)a_%9kPd%2phmZ&L;aCi{&CgZrNULCRn=Tl{*ErfQn-yTpN#t|6S9BNo7-1 z0V~|)z{pEHiU#+{9yq*LJ~2+YnX^8}g@QZDT^Dqv;#r%#)Jkc50lmRT`l^F#f=Be8 z$NAl#$A%8K@4NK7iuPzmY+=eQs(spwGpiAKob-k>kbnD6#kk9tQT-!n!ap4)Y=7-L zJ?gWfonLb6+yRAZ`tgsnD-3mBtV)i)m+>wH*4FD&%)n3Y*DN=;Q)-B@NeWKMKrlyp zJn3A>@F%L)_v=}I7$M|?0NN#dZ&-d#%Q7d-l6NkLf#dC}L_m*3-@VBs}CbA<8^2JS>vIj;c5sG1ZW$cbW+xs%E=R?2hNkAN1V$~N3 zUXyxhaGhqTI-#dBQqIl>7#+0DGuW!MPtiushRD8Z4~_qB!pIdof6SBw&V7p5M&TCd zCR)8b`E61Mwv^3K2h~<xe&m9t8Qg{M7E_NF8=jI&tzL3QJpM7 zm(*21*lYSS;gb=Xb0gbzIQ=kKbWR$%IdbRAg;)2lN52D)UhJ0y5}MqxkMHWP-I2m> z4tZ1PR`aj_?G`O-}Tvx%H_mv-m;%UO$Jz10c$`9Sh^&ez! z&Oba)FA9PziQA7(J^;LUo|$p>hBk+4XhIgaTFh{azvy;jdbjbG;~FZ2D583!ru)?- z%I#M>`N$IJA_#OryHPN<8lXXE1VBpLgFyOQWk?3#1B&LzO;kE{$%_xn8gN#pZQ9NR zEWhE>r%=`G0h*!Ci4+(!|9N)C5F|7>rH|O+t2!q)N72~XLPNE8H}UcG0&z$w|6qnP z$?-HfkKQbfEgXFbd^@j?onxLqgl5GC$ZWf1<$qh19CmQ6_KMe%t~Xj1BZHz z^hsKXQBDajvjp-f7^^B1vL6A1xiVX*A%hEBFQU7Tl8Q7&M@Kk`buU#1DB3%eC;~a0 z!U96nn`z<*OE~6pQU=wWPCjI0oESLB{U^&Dam2MWqXFME-5%1Rrl2O(5TnPDA0oe$ z3)gz(9F`3JLS20zom3enk(x&s!+draH~_Pi&vx6reax< zcb%uxtz2qez7q!G$f5s!+#Xfa?mvSxx&DR-TP65-@N9Qn13Kqc8{Wy${F!MSKW71z z)W^uH9oIpK=CXTLt$Az6?;7y9sQsRPh*B|Wsl^uqnG*4*= zGV)2geyLXk5yQR_9*mM%-6y{KPdOb-xMB@wkKR05myBi~k_Q@zFk3|(MpzCBz%qrj*ywt38MXRW?jFH^uG+u1BC;IN&CINy|*DkFPzXuYMt#*_v&6| z?!0So>3xg6+d%R?`m>c*QK@GXe~psPg=2j>$@?t^%Xfnc&lK5X{8F=W`0QSE{h)0< zbya~t@ix{{tS|*1t+^oDkAq1S!s5)VN9;FWj%b~!<3|CsAr9GxuoL=-Jab}a+!_)~ zviDbb!9?O7`BuDPXz+=aKV>b_f|a@n1I-Ki_Fz4Uhmc3{M(Qr=!(nfJsD+|-45ZvG zIZ-?S-G{cdaIaSJghgiH?ts)hc&vu6nn9lKQ7txU@?jOKo;` zt(W$a+gQ!V|LtJg&hU`xreDiBPU-86?W}Q?L*IfeWp2wGQ;|x)Rn8>6@+gvHhhCAi z06ohLt<2D;)4hyCgbC;}*&|dho1}`b_E*C0?{`cwo+!%^olfjywmtmDcM~VtPskri z`#x7xo2h$A4(Kgy6H_(!x{{(JDeB3m7${cl7D`mR)y2q6uyGeEeLvKghPE`x&_3U+ zCa2b2y?>sE^s+9j=YYnILmBxo4dXkB>r%jeSr^!#AAU+l=NxI5vgVW!e6Mb(8i-!o z_S;McCqJ6)NlHve60R_D5zrz^o?o)6B)ZThD$_yvo^b?-4DpqomR(gP0!Y0ejLQ@BlgO)D=+QcwoL!_T+7aw3M{wp~IaL0XVqTyr!| zN{=X{VXcF8V0d-TWGHppphyO6;gPa~_k0?B5bBoKig25NOFFvQiMz-M*VtljRIXBH zsca`!y#;Uxe17OF$Kk(^oL@>u*~2UtysA2w{p;mHgrr%-mLk;MajyO=l(<@@OG-k( z;#eGgOM^Wm{oDGkx!dYUIgm;p^I+Of#XlNeJxsYy+cen5aJo8H?!!=EIt4B-q>?&T zwm8OQkp)T;F|@z;zc$vKXt|v`(!ttF{DsHUy-;h}F{9)McifwS_&cjtXMg=0RfL=e zAINH$g-ku@Sik7zb?S|A~Nwu^7zQ4>%Zs`0J-#2bE2>Cc)z^FibG4p9y&8jgF=2pUIP^Le8#9J|G!Ta0&t>WhT3UXIbv z1UxEc<%hpcU)DRqOO2svx!ckGwxdBNqJrki5umIJ7n{!xv27|&o!Px7ckOurhfdO% z&Tx30NT^5Rt|l<#mTxAl9e`?kRR+G-@Vxf8f3vKBr+6(8WBs_G2bya_?sqjVH=PT_ zTw7v={QU;ShaFE7_HKu%CPJ3RLl@-o|{^T0(h-sw#yqdQ7CX_xA`9 z3F}X6{!IgG%LxSa0!3%jf3yQ;qL+uW1t9gfRByz+w?&_JI2j&{_@Zw%tX$H$+o9Ur zA?K^1`EH}h$X0b@S?moJ()VkJTk@rxWC1{Ehm|g*ueT>F)$N&;Q^Y(_6-*W000y>; zj%chCeb36(?t#CmGItmBuEq8;qmt0YhU-kP%h7=p(-nF z=L@8RjU%?;s4}WrdU8-l-uUg?kHcAmdyL!)-^(TsO^9Pj*L)VP^f5ilH{JBp@;IC` zHsdEGwlb@#h1)c=-mh($%SD^I_b1KSFl>yN6j_)cnrN78bVnA{V3W`4j{hkCPl5SM{OJJ zxLsB-Tbr46-2kb=M-u88G$05(GKO07R|=1j%$rD2UiVlESWnwc=0O;PcS~H>L6G|z zzlQx_Be}lN3I9V!vDuR-;Yq%@^=W-PGL3t1TNo;Ak>6((@dSL4a^{ByaCZNXBVVI< z9!L`d2*<%(N-lx!R4?ho_Ij{*%xqoAF^bLmP2#@N;ilY?1Eo#z_Wexh)JbvR3NRJc+Y zbCxX5T8SXcu&84Y|j5F}Re3d8uvCF8}v^UCPM&^!Cu>j%Eip zZ}-(deZ$B04d=wiZQxJMag^eYe%c{`Po3?pZBy z4GHKBD}_Bi7feWa;YZwQN`rzRIg;l6&7&_D0Ckwmwqb@q%77!)v?d%MqR~iLMa=Dt zcVG+LIUeI=!t=KiMPXt%bY<{X37h5tpHP|Ro`^^p+5=0~2)~>-z zGJ(#atakO~?O=V$gx7H^s+ns{8A_m<2d%RRQ0XRWBRJsOWB~=4zX8NYX38&&2tr64 zcF~q*mrE?bL7N0ZPtu^({O!kx?}5VDWZTdaSr^;C=E&-HmAf`o=r&=$@R9=)q=HQO zR$Y3>c&I?03TFJ&9FH@Uk71}*r7PLs^*ABy`?)>Hipq#n2HHc&!}t)180PTarg zV$pJV_~T<;uk5Q!{~|`-^L$PPE*)j3=$&QnJYL>;C`zS62z=IVr{MF`rtHb_tQGI= zK7FM>l4iN3P3X*xr-l|uo`aJJus9$I$`Zgm{wT}-f;ikRiM`iGT|Sorj>)&_tRq#_ z?tiMYYr~_CuI}0A-}&zawd#>?GJ2x-#6;C>qoq?fr(li#YN9fO2q&x;?zkSQ5f|uF zVE8WL@;o5xLPqQ&bI_d3#Y9Rac=@}wa*0_~(-mVrP(|n7Az%VhD856OU+)~-p2t|- z*t4xw_2w}xaM9Xno%vzu%uX-TkBaqw|B`-P_`Gfi^pl$n|M0;G0=9C3I!?g{5gL#Y z8um=njnFtt`WM6n2nl@@5+%LD+O{mi{9FEiO2s6 z6Qv2Im6GGLrxEqTv`kOk4|6I{n-4c1I{RE=q~E5=5IgeEiy~^V*;7`vEj%B+$JKku ze!mB=A$Q1K(WVs#43jFkNN&O^0{;I28e=_&g4k%AZZ>fq1JqLK1)wSJ@19V>ZQ+NO zNjk&A8Ro=VMAyzo#4ds%Lf{a%wNc$dc)4AP^CDz!j5foA3Km*Xlu0Gr;$i}XV47z9 z4#M>l7KqnA)od}P7t`SR6&2xZ+13D!ws&19ul}=mVi9MvKYU8{gBPgQisBNeZuDlI zq0JOPxp`2Ilt{>>#ZaYj&089bu4dGUM-qQu!)kZ$sd{=dAbLLP^Z_uIAVdSW^Twgn zj-S@CFC{`7mkaS>2fgT+m=;Pemt}+s*S`7m^I1t;JrvK9rH8xVuY2JOjUk|c3de?3 zEnCOblOMBS0olwYWJxh=EP?KFsbj0S>pJOUJDwz6n{z;FO_bsy(sNj{Zp5?Wo~sfj z;U<5?HN!(mRpBF#hN^|IazFMv*rX!(?0ocfc$do&TxW^t(-HG8?n$|4<#H;AXi46( zP@Vgf+1nQLEI)L|!M!nc;c|22lAKIqA`Q*?)2vb*>eme;yB*Z+kA#)Y)sOYjV3wgt zN2d+YgE$6cK*xGto|v4&dysezX5$je2~D*&GwO= z8owO!pRHJjUvq{ld9qDnAnayvttj-`cEwj(0b>!k|8?(*2?X*61uTFjG3*z0&#t>{ zbj?tXUlwjKTpgJMuF}isZXmAL6hYYBV)HibyEhemxkW(E&TQn-=R%v#D->kOnL6(& zx-D2uC)wu*c3kctFNf41o+-s9Cdv#Owe*_)^dV%UP2cJz1`qze6G^R0`{erf8p?vD zC29vI!Z5tN9-UB6rF9%v*)a1-WLP`cHmNe{Bnvy2g@JKGFszft!H^yt0OfXwcX#wn z9IQi((MU&CS4sqehd#Ku+7t)$I_FXLa0LJwp%52B?l>`W6>?mU<2b?b4o0g=3m8r1 zbz+27Dy(t0rt0Iqi#@E>&#xsILgtWKKfMn%Hzi7@P4w7=Q8%lg`V}@uph34h(~$zf zx5}mo2^MLjLyJUtkU=qpO(BE^_{qxTEE$JTH`TxiLygrP-B$lL_??`TDb;PQQ-e8J zP}Tnd{<6dK4{Z|a5QWwL)CA>at63>oZ{4CCx>PNA6w2!|OImEUw?#0c8q#il=99tRvq@Czq$2zkSs)6pW^8>5)T!6z{ z5zY^P@sY0GM~ujDn+XlVvl{r*|1?LV#n8bXUkSA(7o-jtb13Xa%mwFCJ!ZfzB#r2B zF_XP3f4vY=peCHi9l{)M45v<5gZX(Bz zxHs7l<-X2pUvteh&-2cG&biNhZbS#FiWtj&uVK-Ut8V{# zNDs!73yIwJ82t+0x4dtEhjDb76g~A<{EWlF11apom%sYoHF*AwxbU0AAWvU_{DEiq z1x9*3AW4(`&i}?5Tu02YLqA0r_=U0Co$iB{VlZ#&vrb~r?7_T+Mli$=#YNhH>zjGn zp*^7&k@cWi1_lq-c1b}ItGl=oi_<$oHqS;AlFERz1*h5;6SQalX7 zO-=|*VaI(F-AIQSj$?N~bRUl4F)S@rcFw4yTSX~i}H@g zSD8hb~l5%fP%37B7zZHO9EeJcQvOq-^7jTzXfG0*Wn&yjZBXi zV&T==mIEpHv6>eAl3~_@>LHKmVnGj|rFRws+_k`rd9@;jpNCY+WFJ0cXuaG`XNTMt z5Vzm_ZaiqBd^LQM!}I--%aMqpT4I}*gefR-bRt`(d0;dn^Y8dgmi4DBs>ru8#noT! z_GgyZ=_bnm=Ji_xSKL>tVqgmg54mZ3%kp6UO4^mH2K^bXw(58L&W22#Yvq`55k@M7 zct4&;f!dg!$sTB?t>`9Xg^52h)fc$I2)$^9fr=*dbM_>gA^d9G3AYg-Mt%(wkK5jD zeyzC%I~D`)L^bp;H~UdgBwo9K)h3q^B}V^`A&I_>|IZvZmz(hO zu8bJ#100;^{4fY#Vfao)n)r55`OC4he;<-2g5k8yMBF6% z%r4?yQxpR9Lm=8P8NX#VnGosJaW5OFbJwpM^W%`{*1_5OA-6E4JyhEd4!%6E9G_s| z81d!BGo6QP@Aa?cDROfJ_>M7+hNs^)wU92dSwH{hT=SeXXK;DAsYASs%BVQ| z5kH{d#JldC)b&K9vzxm#G$|j*8P<7Yuo_!yHWQU6@*H*)vE9Jw>r?>ttdfK=olK_} zCMWLEL(f?9-pZ!p5gfcB7E6Aw;z9GM?L@5G{h|~W(#|j6rO2(OmxTtovr)P>Xyf zLuMlf;JWAUc8F)LxDDzpAw-up|LCXM_QGPK+HZ$RO$t2>$1;D=aycbb{S2LgwMU=P z-FyD!nrnaF2%0`5<>uY`F&Uf}SJ>lG#;w_u4hnYeu224Ib>yvW3IB^}JyUB;w> zPaDa(V>54k7LNH=OOM8SH?kFlWNoUjM$imzIFCNC^00US+}t_ipWOdI^R zOo`BDO zVVd^k!SPu>Dla_)2;;K91$Cr^8wy*@0($4X-wqZX%9x5LkqiS~5<3kYi9=m}7BXiA zP9n&W2TOy87w`hKYbHm9jXzb~O=#PEk2us{v)jJu*IIY|$jnZ%gQ4bv?xz=Y9j%#4 zto6*5&iW&ZgEN8jF@pSIHUe@m4_@C;IfjV*C8JfLo{GFTkF>kT;=5j%jX;ox#@B+< z(R-TMG1dJd_EYE`<>0fitQ-)+2qNm1#5xTCXZohg zegabQ1~D1(7R2!0)Z(8SzI^zd@Q~S1`^n?88iBj7IbuIZd@gWP_?MJ(^sY$B;huS< z=l5~Bl$=B4@;Pyv=bhtG;w67-1x5CO=3;R|eOwZg=0u`Vu7Rd4O*x5t{oCy4Pxc+- z+KYAXEKjA;n@@7z)nTl@{i-YD9}_0TRJTx48dF5%-)2$za|!{T!2QC{vsh;4n~`&) zFaDg{`_TDkmwJWt6$kDC#BcHna;X$-3pAMJoH;PH( zuj7y-Fshb~=c5I@TyeY&Q_nu0z{t+Epj2Sg@&rRahA2@Zot_8xZ4K`dkQ(rCi*2b# z3>_Uw8zkx{{4ns*;$)|!3&W{~a6Y@p6+VoX#?@)IOJijH^7r?_^OR9sri=4OjER92 zZ7mQe|G`6#(fDQh*5J*+JT4abGDDU=b=A6=mq6sf2*MOzom1q|lis52>_Cgcv%m+W z*kJ&@G(jaFsnsfr**`p{7!z0D^kk<^7UQ|2JUNhtNTn2vBex4G=r34}RDN6;!)+>_ z)Bbn^u#0au8XG;VcnTLk#%a)(eRo@<=2GkXFjJQtmpJ_oe}j~2x$($IZ}BD%h3m&R zVV&mref!NaQo=5;>X+w|oqbDs%4bzIR>HxPxDmUa@_4)Ya+1G0Ta;|&}wrd$HeQK5xSTZ^sYtiMfC2O1w@If1rdMf2rxdZ1ETHh zwAW>JF(#uKUkbm-SdJv%Ckw#{C}nu-Kp-t;Z{~OdeaRyjG4ko`(JphDK1*tlt7hoq zAZ>2fYKuAIQPno)$j6c{39kl&_WM2>>#lPbADU$UG&52XXkMAT;As|m^ta9V`W6Qu z^t_Obid3+M{oI8nM0WIP3I9pfuA>da2i8umuTH*?kiEvHnCGhTm)%283a5Vcw!Wgx z{i7xk=6rWYf3XKXH~Z9si(+KX-QYro+&)DJd!*I)_5(>Xo$c^9x?*RGheLgP`0~=u zgf?K|C40xpXO#3g&ecYR58;b+*9tV?Z_!m(a{QgZ83seo!4h9{@GE#ye)vRl2qVM| z@-y5%294(>Fa^)*o~hPYf&J3iV}*Gc6pJ2aZAMvnSBS z=pedTek$_Y8uL#ZUnlogNzugP=x>ILd5-m-B6_S`!;h+T={d+M)Ar3xpVu%-7=Fj+ z0%d-p`29rqq|_`1<-fT{q%{{jXxM#j4D;Hy_-FuD1LvqwXtUX;ksYfRBa+?q?M-H% z>adJU3RwtOB&L45wgy&=11P`VQpgIg- zQNeu{jR#@aBxt?F6?i;MLM9wllf5ahvm%Rexqrh3DgLObso7BudhCc8!&%H8TG+^! zdem@zl4tU{XB9W=?qa8*pHO{C^`J`EXu!eHjPz()O{En zfi3^`d2RE|tMB|U-WL}txPcy%h|o*h@?MWHVnB_4u?(qZ!Fc^D!n=&r{c>8LUcXeu z0_*pGBQ_T~p8;zhT=80^SLd4ox~;xBzwm@)PU%78#R4J=n)TtM95(B`Dh2)WZaWg; znseW7gBxOQ<)PrITw$_dvRXR$W9`rPQcz{m(VX6v5T4l$LtCn&*8B=UeVh-)Z@%qA zhXnWl6kZF(1lG3_m&!#?S(YP}jVN_!<)wlN65eNJH+g3&@%$u_Hb;EY^j6ft7bwrU zA2pD%f80p*@jV}^Mdh&X)LV8_K01ptCj;ap@IcB0`HN;A(IxXZCopTku$JZ?i?bnH z8M&on+bdL1eTHc_pN4*BK6peA-u`O`(T9sM<4lYwt=#gAd3H#U5hiRQJ@gwQ-xBfN zC`Z%h)Xf!M&c-a@o*M78Rig7~ZZtXYz>z|96Y_B2>;7z6>cRT^U7ByNn*Yo2gP7h2 zCL+5JTUuUw?p=yN_lfA)jgn0S>v!rriZ4X8V}%F~C!0Chrj8p&kLrg)sJY=n?=YA< zE1W&$m%}$J+6wGR;O7@~FBJIx{b2p4Lb)unb--FB3DIo>B*_RGBHrQWNCa24_iS)2 zu1raj1QbdRF7U@KHNuN;965PvNaL!eB7WtL{StDUzj9=#=00;w)U!y`l>e3HPn=B^ zN1o?LVeCFX5j4Z;?0gu^#4!E2f}`HzoIgp+337PXv(JgHCP)SAnViX~JX+dy61sKz zgjcNV10mH=ravd7&+sFb%z1s#fz%UHT>9oM2m1z7N4{3CEMMo5xbVB}cG_#Au)u!N zHS)8&hfdrQYK}~vm4l$=zv>+V_;>!n zStw=u-y!=?uY(fU&y|QS+qy*zj#O$POp>UXp5={xT$CcHLpt=U6X~pNwme{A~24;L#f*6IVZwtvIxv*7mAbh}$_clCPN zmHTX~&{#{c>Yrj$bbYPJC4`C80%C0`Dr{~zLJ8&;h3Fn&89$`+hooG1(VXxk5l@}u z=vJKgkyp(&ZhSU%yz#LK?~>4dg ztxdHfmJ+e!Z_UtEs-Q`34>C~iUVKD?TcPCU=g%UDU`W43w^b4mH&k&3^V?7ip}hxF zEm9@3BPvdxerSLHBHH&jWIBo&R|wtGHGYK9MyE`)B?L~BhRkaWl`WsIWjW6`J$WtV z13Tj|ic0H-@5R4kUXN|B9k}qNeMIbO-HMfMvsj%OUa=M3yb_}FbfbRK^+$PiGtMfO zcGuQA{%^6n#O-awPE@pM^Z4USgO3pwIZ!k+IhPOut6x#YEF^vtLq|Ym3)3WIYd;C; z*vW&@4m}px)g^(BPIn7l{E{&6CdDnd576d?If2eMDlsWuy-qouH#xKy6>yG&-a~S5 zN!M6hl%A3}iQ-0epU^8TAD(V4WRo$FYE zOR7=3L{8(=Ku?{o+@JRq#Sf}G689tdk_D+q1c-J7zpO>Iw*YoWzl;L=dBH*=4KuUo`d} z^JnZU*hbA~K-3uAShPMIASj5b} zb9RF`WcS^`_cl`le8}*F$%LgkzDJ)3U2gEb!|*HdAz)h9S&G?pKqrkf5QdG1vau5w z!ixW4`Eods_6hxSXVDIJ_#Qr}0QzBqZ8qvA2d=;`P%F%YI^h5Ds2MISdj+huwly|v zjt8TG?%q=BN}b112{s4}$x@{gscJ4-4OmoQzf#O%pbg3>915SC$6eET*$KsC=W%Tq z0@9gI++*~xSVh3tJroXJyiqQ=k=u_u1dy%vcCZ@2mj?EnJd1Jk0<8OQGkz&#`4Ro!8#Jq#Z;tmz%_3??4;sB>qy1@~N*o4YgTd zUISKaCyniE04ChRS=(yPT_^A2sGjVrU-zH0k8KN|uM!jX+YU-j>l|&A#I*R`{8uV& zHXzw_`@L1pMUQMLP{nM2dEv@QU!B);sV;HTfDr@NRA|?Nzbub5=b~H({yVQrcgunM zu9(ng-){tQLUFL&63mB)$|hELDwu6?D0IT(R}^wnxBD#Y^wgazZZ_La-pgXZ+%Z$Y z$Vlx;aF%hJT>WgN;rgHCduTtVr{AE{UsG+5oQ5lwB1yk7B)inw8%Qp#_A`im6KzSl z^W_;B-)9+pT+jW3tmBBGa!-Cl5|hEpZo{3N-23d8fVcknK)Kn6f~EF$67rg3<^Rq| zgXbKF@6{#|I`zW^LE8kfg}Ms5zf-phLI+>4VEyn}-?gjpG-}9d78t2VUd{4sf zfuFJD@g1h~3rE5`WLMrbPMrG48sPmJmGt5#c<`kQJ<8A6n~nM-ET2Vlr4KW##F3TC ziv@^mWDUh|!4QF~nSOm9+Aaw*%YvOka!KSGzf$w)!*+pItnC7w;Ey-&xAwGmWEzIj z5U!RuMnV0$ehF!4raI_UCUkt_{QM75jtlJWzCJvC+#7+n(^@CK@Aih8#lXvo@~S#x z=Qv&Vytw|Uo~VK$d8UC+7QtoAbX)XEkC6TLZQb|w=$xeS%FK7jcfgu8oeP};3$}>H zV3iT3Qn+_=1f~Zui!3L$8lqLYKkc<93IJq!&ib1>W)3;^b zM&PO$OmCksTXJvD#}o1X8dHGo2+9@fgH{8}N}j%ACHUBF6(|T$D?%>PEr3z<%Y{@? zHTsNI-SZRFYF`_0cOOMbbTFa!_G1)p2DizefqMyPe{M_!qy~2~Z^JAx!x?Xb1w@)& z+$Phf26!+TQk|^Yt2`Lzs49w$*N`l|$4*5Q{nnm;$1vYBftZc8y#Ot7?uaLL_8jIg zLfC=nOmt_$@f-ijA$cls1r^V3`Dz7KhxUT?F649Ecp zgmcIV_YV5_zFY3a*wTj0h=+gqtYZ@xaZDZ|^_|#qVm(mRS%(=K?7{J&E!K`P)V!_k zUtifTumBf7XnC64%58ED@783gCnKVlrj<<%BuWEJRggmlSEX;NR#th0kh*-j-UE300HvCLgZUFH_4J_~Q55qu1rOd-8T7wIG4jsq zV(u3#w5!kOy|uM%O&?<*BW65S({EHj%wd-`MfcNnv}kb7v-!xwi!P3-Z#omc9UZeW zbI6oO>T=LiJ|3gbCSSUpcfTlBEtfr#i#}w3dwA(8!tL=%{PS?wO5wl<8Rcr%3uns& z(Z3f{%=@!6R9<{r#;uQUR;w-RH=vCi+g7^ctLV&`Q)vHpA;sd@CCveWIIOSc(-CxQ zI*hCI?nvviKy=@Ywc3FR#FAtatNz(ZI7<^l&sd|^vb}eTn8O1c@}=C<^I0ytHZ&Gr zWyj>ZhH6x;=)T946a2+o`A0cO$-07}*zK!; zegG;bDHjXobkw!;hDIfK0{nM6fef;CTOSXmfgBQxONVYiC<@5~uo6J6xO={%4b9`%Wkf|{o z632Sp(s=^@{Q6SWrA6E&vwJMVY)A=_RPH&^V0=BZp5y)Ha~x^!M_OJ8t#6W2W_w|q za|Q7yWt=lhI2BcF(00T`+$&4#QLlS}%Vq~j*lTNk^M>0tU-Z$TM1vu@q#<6tXY`tk zhs0mZ&{iF*SrR0OkT!mo4%Xuru@R;-Ox!705wsXie4+#QSZ1HF{6wLZ*o^NmG#R*q zc8G6Icw^UtCx_ksar2WItrIOuk&>V;>87hfwaK9})e!^v&O|_VYY?}Hql`KRi zR%}X3`>zf*P!PxMa80Dfh$dNjH1_~cSsnv&hPbhCUY@1RA)7L-#y(yCO{|uRLlznd z4XCd6?T8M~yPBtcQB9U_uv4QZp8P0iSm5s>Dz=Gpwa_-i!aXiUOWy6!nclh3lce@@ zX?ULMhWI1ZBNp4beEMH{T0Y0P7-?b>% z6+b~_S3tKq0A3B%jJ0GeOY`C1dK_(L;I~8{k9>ATWER^vhhtnXXp=tOTS7Q$p`$Xp zbWHUudvb*53%dN2xew2x2M{5dkN*^(eX!&7vNeHX!LrbM^?TA=NTC-+6;DWB`_okJ`&+%~-<;do^2%@+L7nm62dQ5ra}2~-QvB%_Mot}nkidWn|o z=xa)6RE0+Li{nf|OYBU`u*;+3z=H0nKcti^g^LZxRFzJOc)M|Wku$Hsa@_W=@ubY3 zwb?1xNv91e*QhFVj{p`JP9S=}SAx0==jKl-SJH0m=kZBozjzW1Ba~aFFSCorCnkQE z??lFSJ#&y_4S8|o*^Z&kZOqP4_(#OX?U(vFBd2R)jXQyj*It0kmo+B<^<ESkiBct;5ss z&VrCwJD_-gZ78JG^-H&5L$FLOitH!E^?>pGOpnEiZenUt=_AJ9^k(S%)Y&Kk$CPt!e-y( zv3Bc30x85q-Vyr&Q-JnBOaoH%gI{=38)7>WdloFlON!1^W4PLv*I01|`Lxr+=;=W&nfD^*o zFaaelsu&&uYGEfBgSW-w`b)1_K@E~68a;$l+7~=`zq9hoT0dkN0fMbg8-5f&xkc^Oxg~00#f9yt z(aO2^S0<5;jk_pP^g?PCG=8 zWTr2a82-c!?+~svp7fgQsn>;+l`R-qu!fpRr2U4XA}d*cq$r)@TR+SKaf zKrHqbjF!C)M75`2j%?pJZLul!Sv0+yXlW&LhaF19Z(XD&y1XtvNKf#V4E}nxVHQGF5F!?VR z+2J$^fD^h!eFa0TUbeDD1E}xVHRXMuVW)Da2&v{pa4H|)BHTHI0BttzRr#35hBFi1bfy3>#7Vf4!yG85F6jCvHxQMx8mvsHHMf|!pdD-_9p?WdG2`^ zx;CG5O*oA1w?G^v8l(5FcR8=&nC-@1^IjxQljCd!+c18lFq#ZS;!G7_vW2NMv=E@% zbe1b3Xu#+~8wuI9;^L)8wIYXIU*};6iQ_IeI`K>a^v%&WZ~Bj$O_M>9(7#MliT)ht-fg!#5an#Xaw?=fX*GVK*$OZm5vx9!q~e!~U36UuYRWzaGc(N{-2 zuZjHVr8H*zej=`Gzz60zBdI7DH~0%X8&sfgHqX?j{`b+)M$tB^`%Te!Ck`fiIj;V; zX-#Xw(bahA^%zY0Q%T-9NgL-kdNO9&TWn@?r=iD5w;Is%H$#7T|CsV#WS3Vpi3wyt zIbrPw!h6HtqIoB&AGPU!_=Qd;pHrxDMhL<=tWT)D!g%ch6$T;^FsBIU?iitbi5;;Y z(Tm3nkQB`YjI1`RR&>+PYJWQu(uTSGKDjnO;FM43dRw%GPhsxq(i!vSK+e`?NM90?&X&MNw0!F<^-7c|ZWM;u^oBh9TYwl-at{;lL zZ{}~3vWlhw(v?gIPyw`7zvHF5`ml;!fl)v7LE3)Rase4fFji7Ab6!3P4tfPz2oujH{ILJ*1eA+CaSo}-#>J5j zz|Pd%K2!tn%{^pf8KFm2XQp$>V}$6_n=4n!F@{)GpU#a39->cLin2t*gJ+%hwX*at zW+%>{XFql`L1I1N+}7b!?`C_Ufs;k``31}wr;J4(ZYHhXUaWEjUW+N;t|XaV`h35x zekA#XwQ=zy!aI=*n7`56t>xLA!AnJoTt|~d`$?tW%O>B=1!>;ZMmLRxVLk8AnQw0_ z>Kd0NBI_=_4-xRZjD{snOUCYLl!MF8SUXYthZkc`?jFA8y?*V zsN$pWH(mNG3mh~T%gw!Vc+T_LT|H%90T{}^X;eNRX{n7T6|?a z>~8MQdaPf+6v8uw&-U&OnmbT$9wV-^UBX(>jlTAkManA3m9azK$e=^{A2Uq$$qnpS z#A2vSTZ?7C658KzdE{u(`$$+#E7!rd3oR8Yo@;N}xJ}q-=08TBY!6VBof5Iz^5CBj zx7}u-zx57oHYD|i3zVz~JuWJdR>H8(Jy*GEMsYdUqC|0qe4F2IXnD>5CXe=WZQv>+ zt6T@v$wus{zr$g>QfM`(nEdF!u2)y|Jg(g zU^>D(MbkoMbYypw;*c+EWa$wi6R)o&UnxnZ+2-u5WC&5s)=%0o<8a;dE!#w6 z!szhKa%3PQBsYEY)+rb&dYDslJi64U53)i1?5?I`5!sKFd{` zvh4`BWKArhz~nLvBk4gIB+0jx7oJ6islalKHRE7A*kf?iszpcmNN;(?{NIm32W0%N z4dv(?A=%GIJoE8q=?ANROmMXgAd03%&xZB3mOz28iGIw{+#zDB+mqWX?C0_vqW= zw@=*m=i+QatG>+;cw5wFcx5%kY$lO()Hz18=5i?90zLOt$XZKEGpWQab!bwe7)5zK z7^>TChJH1278uuVLGGBOMTmbBzjpiQBjN?)y%nAF?_a*Eo&CvwWI!w$9Ry~>ic2^w z6+W2&+JSyI*A1kkO@V?4{uj=@r?d*w09n?;3$O%HLGSoD)Y`Ft zUy#O^DtB*jCt}&msJm?{yQf=5)D5vGFx?onyB#z8$0N=5c4xssnSqWB(ENCV?9&I~ zfs~>}R}J6ScY0s{O9urBS||v49*e7@LxHB)NKA!tDMZ(L{B^BKIz1k@r8Q6C8jT>E1BR%G`;CZ}`YA14_ z&&xCcdqV&cqHlcTvhM_~UfX^KL#ZC^M(zkIq5y9132 z-(On(y!7&v$k&@&G2aW1N1IGXzVUx|Jec_zp&^O+v9oI10r)y3ju(C`iwP}*3u+5( z>LF_Lz}^%`=7|H=*_tN;f|iu&wcsgjIxoB^iD6C5fZj#J%|YUl7y|^G-yTApf0Bb=nQMK8>F?ha0SD~o1Ii3u@piJ_qu zu&K0bfDBN>OS0IX>}9Oc`11{BFG~Hp+(k1RkAQ?)6AJ%+8+fP=*0rCpT34q}pNfPG z=u3bq=(hs4fSDGcZTpJdPVzkCr!^BqT7&pySAgM?Vb8*rFN?Kd&NvA;(SO>)%p5zx z$%>S6B+mxEg8o}2cy?!HINhc<+3|jQu zGYK~MQv*uZysIvCYd#@AP;Dvx$AEN(cCX;b^zsYsuilc*yg>8Q7BTGx^{KXxBn^J^ zHG%3UZekRPjWfCU--^!wrEZLrMU)j!f8nqRRVZ* zpY1st$YVZvJa+zv!nS4ItuJNF6|y~-1!^?9VJ&}680UB6BHHw?N*iYA(}j^upJn7{ zBas~B8te9p*>BK1Zty=ruDmO?U0FRpP9}aL26%>_BFQYc#w9Z%7QV8T?l(tcPCy>y z001S3V`z2MM9K>Sfd}K2;)z~L^aRRREIjBJ(Q=5)jyhS4`I~Y^GdvpO9HP8D5Ds&B zrT2ianfQo|D(KQtbTu%unv-KzrIl@?gaH+_mkc`l2~A5yiW^ zWhdP8^B^P@~dP;95fsuxrS}gkSm<-@^_3(iB$Eo^$Y!+T1)Xes5I# zTQj2QjI!eOPFu7{WgO6xU7Bp)g9#uX!Jx56%em0B)Ig_Zs-!nlu`J zRXAomv)4~U0sa17yjBSaj75n}%r1-pl3uNYQ_0rl2?W@<6o}`>yz< z3F&>#;EX2V3MYHf_VJEP>*zW8hMai?4PrCtgYciGNdYrOR73@#N%u^3hU z&FC>3nl3C#RKe3{2#5YNO;Cpni@6fVrX`IBqMJr z1M3|YOPogTJ`1+EGeXWkKu+FL+?J)w@lc!H2{?;i0CkKp`9F$*3OMky`|%ACka<^wP?1qUy%UcP?gR6F!+6@v2}B~M?<(D|;xNiU06D5rBM@+|0$MyPqVCHY=(F9Y;{m##^+C@G3X9Voe1dRtMH6HYY$z79w zVzhB0B%!pH^?eSk!ZV1o`rn7s+ROw6f=^qy;AOkW=n}I0ih+QNzsG`!=q*Y7B9Sy) ze6jQJ!8O9yyW|sW1aIMAhGLqjgTMCc7KAavk>=M5r9N6=pUPoBvcor4UanI2L~sXo zR}dlVz}gaiU_c4(a;furI?U$`I!<`AVYKn{h015?755Fb4eASLx7w5uji3e^U_`xu z*>#Ou(o$wxBC((Uefv^7dLzf2p7Qy3Xj9?uQBW$S!^70(*GOl$QnMqXB{iu+iA$&Q zZ19;P+h^8x)v32$QwrRgX0*?knN1xGj=j}Z$}`2aGv)S_kBeeRZS4D+7H(X^ev(^- zzDn`$D;IRNw8B|}Q#OlExWpT-6sfo2MKD0>%^&#b?Jf+9`&dvL3f$d30A+xqc{1rP zf-pXM0C||uR{Cp|6Etq4_MGwC5vLKcg=0`|Q$~#!l)CnzP?L@8E7#luW;+NR$37rL1Q< zrkKFtKQs3z_C>O{qJ~3KGxL#43w0s!(FO(Y`s_OVtq28|dJrb$vsJIL+ALeEg90@@ zc*5~T{nI}#dX|fRZ28viv0VWFW4;jsw$#Obke~6)UjY?@>n$ioRy(eSO41X^ew_)|^t~!sla#i4T|nCBX*{5b*gh4}?jwe3q;%R~xASZ~n~a=j9)^t- zfQ3EJ6ptnYlX#$cvU#i&-@aA+?sP|tPa{ZxKn&=|3lKVl)bEto^LN>S_)LtkE>s|4 z_z|;DXdpA^{V_@stBu*~5T#3^J^N5HyI?>puIn)!EXTj73tgI?MITI~mn`sy`wjkL zdw(anF&jI`?JH*+I-0-zm8G*mFKiuyNx+7`&(Y<9segZNq5>*4U%E^)H6j$~|B`4_ixwod`%te=U7+MMCF1tbQ(6V}xuJRFENrKi(Aqj;O+d zqe35eN(1iba{_K94}5~XO+u8R;>$lqWQ3mWD_jA41N~jiBzR9J&N^HyJVF&&d~oPd zX-S{ksF=-$R&XTL&%G_8IUUxx$BqMJG^k?*vNDUGuCFuUS(}gYNoakyOfPKA1?_1{ z9EGl#tbg2Gq7t~is}eSjUNN)Y;iR%c4FAp#um!oTNff2Z%v6Fys7+l)KDeE7!dv8_ zJKc|BvCEM6_7$qIBwRe=eI|9B4m{_jmaCOLzf4`v-jq-Dp~C1lJ1QMrtAJW5xcVh} zi8fGT^#=QWZ#mpq_weSZJG~M@0+axmFqSY!}8(w=o8IDEvLiuOB153{XKF z(CF7h(=eX>YS^$g-L#Z}3h0SzV^~Nx9(gF?dcZs@nUnkd<(*)F2IA)@hIf=M=XL<@ zf2R=H>5Vt-L#X@;@O9d<^3Q!|XK}nB)WzU-xtEQ(OO>ZB>osvS`VrZ64qM3nAv7u$ z()Y+pH`+`P-E~VCI!eqHy(yEsKzMWnZmwYD z1a6h_tj*F2%W3j{O$3cP^{p?;^od?PX^NP&@BHA2@c8vrHi^~*&m6q=2BUqQarkv< zWxFAW1^h)^zIC0e4lo*v#iR-UpSmtquf2lE)RciN5 zJbFAAB2~>E#y41l&&c{@-SNs57w80w;4tuKgYozLP=00Q4-W=^Sk@r4x9Hl<;n_&( z_G66ZqWov{P$50~Yz~cR<*xF1oVa=y55@-MzWf`#$#LTXn!&%wRq+YzB=83rNsff3 zNb8^n!x6L;xz0WZl(?EAw=L+U>ht}6fABG*yNaTV5*E`tj2#N@k;+o<-XXKk zK4Q`Q_wxR5`}SV74(Wq_hs=PMcxGOF)=tZH}L(^P^=jgvi+_bB4fxD^^Co?8Y-^rh+ws zi#J6O0>Th8H9kvbx1U_~fDNPU^zMG4C-Q>tI?zohv$F!ZgmeO0dI=-@o$THP=CF2CG>g>HQ_8y2H_8MgM|4K6tl^F%98ZPOgf9R+Xh^ z-ivr1OsvdTnTj9ViQ`dNVLqB|=qjihp1O8JmF z*`4pDVrkENc_Q{1!!I<;STo6BPJPSp1YD_;^Rr4N4GW#LOQJoi!3O8{MLOOWvxk4z zV*VYfP-Cj|r0DGOdw9;0wqC3irV1 ztG9THK*T{fUYLRkno34O^{Mq@D;|g-;5MB!JjrP6gxi>Wk(5hRH;wMV)Cjn&xMeB(-WZW6VOg9#xb6h1@92-k4m%(>eXrxN zx*Mr5wHp)Kb$C!|2iXfSpy(`-X1}a*Xu)3lgyW`qw-cewC?iJzi$*) z&4#Rd^&{#2YT_rwrVjb0cQ1H;WVVd9vj(S<^g6gp<4FX)rh-R|{(;$yV_0rXXt%te z!JtiU->GB5+4u1;@C-iYWnp4HYNrAugW4`#O#V!`Z9~7i3%2BW=pkkNFl2NKu{VsM z4x9jFF!Yxs+E23f1hU`*!3Ks4qNE2D=oJ#8qPCk^k-c{Uo)&d^*jcE{+{O;^zG zFlnxioU1;?d%)eKc2Polm!Rh;Y^^g8Z!3iC1R7$oBCIh59MjsVWtZZ@Rx`*Ngj3Iw z6gyVl8=tJrx`6E_XjQ~xB0ZlvO+j8941I-NzM_{8T!+X zQ;nB-2k*o`ziQo=1aOEr(POusO%0sf!NcL~5Eo?{gy})FWdyAYU;M}d>-NVmOMOeH z`z;ABJ2LeykQPs_`=&V|5&e&HS$TuIQ*%|FP9Q!&OJtSVO;F@fv9h5{4*_YM^lyz8 z_5H4eN&z$>X1fy6IP}ypYe|(UYQfHXV=y}g8D36#%duw9Zbmk_X1u!c#e_faS4wSM zxwF`lo210=ZCyurA{VrJZWq!RZunoasR-bqdHh+hlRf9(k}Eg5e&b^F316!NABxrM zo8fnQW_t}2{rVIBje!D^5c>=|a*&?hRQGcjMaso@CXihBo!t20rscre-tP}`- zus0DedNAM1?weotZA7#qbKWBL#hpYf^V=?1{#vHAo&JE#E9_02cTrBuLEO@TPLh;0 zx;>4Yo7AGK_CcX(G=_3iH;QOajKX zmi$_8k2P^nUS9ZjC9F-n7aHji~a6X zg4*v8`R4%5VZol)0Z(DzpLl;|5*TywC|zhTiD5hj;iqQgVOYrsJk}X%CsvewUPfk zry2#I-@ zkUb%lF;hvhPFX84Aqg=e+2Um1lYNW9V62(3&6#m#&i8yCzsK({fB0jDbME`T?$`Bt zzAn-->a>AsV=VTG-J1Dl50UZmtmK@~g1(KPVt7l`8GtTPY%-OM&o;-ir_gJEBV`qR zDHtYad#PVOi>Fdkp3~LVDH|vxxZ=73`f%SdjNNB(&RQ`HW;`9s5gk|70UVx$j3cW$ z!h}x-HuEgj|5`fbC2?PgXBXxIfkE(Qi+a;|o^Yf}Hy9aWD! zt?Qj7F7H|@$C~uHR&s>a#`9$&$E zhKNf9IUtQ7=?g&PyOBcED=0lYk8MV4q*e-gCBu%;oVSAe#>IhD4ii@5D2BxM^Z%f_ z%jhd{bNU1cWH==mshKu@!n^VU|2c<#qUUsNB+>{Kiz6wp&8$vy$^op;F+qUYjFFL~ zs&4&QV^pt#tzG%fS#q|P3uTpo&Ryj91Xo<7Eeh`ekCG!8n+)a6ifMOMM7E}OX*aUf zl%kE~LJ_P%ZSGs39`o0S9@c*5^E$P?2UU~xNu;k-&_lDiD6+sZ;vKmxq5%IL?E)M?dd#x};AL`7m}_8+d(?CMPa~K2 zmt}B<|3%tk=7&Aaee0!vsa%cY{1x6_ob-!+B>(yoPHXtu6Mj%sI1DU~-Pa zxbO$M$CVeFxUmoD0))2Lu>&RXQUv14m1Bf^@Kd7wE{r|3K03kU>x3N(O+1}vm~oqk z;_9(`ZqI+kI;HD9p1_a~j&kH(-nB zVMPDu4n03cmj=mwdhA%v@rN@2+Xu>Y&Bfj&a(>-zjJP&ge?s}0ODJ0}G{ig~i-Fhrwchg;Ed&oTW8wb!re@`Htl@7b37 z9Q}WH-?78p+ayED*9J$y2a-O5=BM8{z&hN+gdA3Vqa*>l@m^=%;Qgp?g$G2wQXRKq z&-foULn)TvD)!QGw8LGb2*kOEyY(G-kTqbauFiSAX^(8KMc_kCjPj^1QyOn73(3Kq zz$m=a9yYQ*;->Z0WqsuldP6Hlk_+Bdz7Wa1(re_VtND)kJh_E7^S#G`e{1^QT-|t= zzut60pmxhH&mGuLyl(tJ$lqGAAIWwDf?9lr?}Z+5Q#2oBx7^e%Q)!yX-7$aov1fgH zA6w#;HeD8ElsrtI)~R`;cqIur1?BB{x?qP-hGLN>fvufOCztEWPCvgA9*0qX`(SOm zmEjS*uk1D3U^2z*_Mri(FfvkRFw+#eN@kJPTn_4e+GNg`trC~2VG(TZh4UYyP;S=m zGQXi@!RGv}H9wy1hN#GU=(mW+Oa9wK+}1l9r;T4rw%hHu^F|Q57 zQQ&F+M|ZlS-m&4h9PxMLq@5S)XLJUAABBfJ*wY&vuEC_PTy zRxFAuGKw1blQ;cvl@;;mpED4*oU>Ze;0NpABV0@4cd;1RWXO8M+{}XN=|i#w4Y+iQyy4k7 z?&9NRRGIItH93afpkhD?>L8u63~hegA~)2zzB_o-5H(M(sHFoGcAvzj4m&UMQ46!I zcGh0e!>ne=ku^_?h!{mD!#>DHjrqzNocsKrPYPsPjQ7Z{yMDw^6* zFLmAfhUD&PW5Ze9JlJLQDBIk#fp%V`(t{O-G2PiO(QNG8KGfgis%QF=o0HLIK40Rv z2k^o*6s`o!T_^5jb&D+6Z=-Y4m6>$E$*8_UZ%D`s_0q|&=uLr)gs87C=d?QKrJz>N zRYiWaYnf)E90dj{zZCeSJEm_oysUVQeR|N5W#ax{$9k$b*-vQD11($y@ zeu3_raCuSXFXq^_f&_3o5{^C`e?8;co*gnKqtC&zefYr@= z9#89lc4U8kkdlq$MZYboxt==|?M)Cxxs4lZr+)ld6WXD7k{hySo&Ks!NDkH4wlikt z!Qh4AFG6y@p$iG0mj8zc+pfF)*GuR^DN@CEW}kU{xXG-&u0n{d#xoZ~@cM`pm!I^} zGR}D(=p9l^HC*v^x~0Ga%;Q z)`1y;qtbQnl4&#)AvwT-+b;Qk9R$UO3E!l|iM*lSDqGthE53D)KAM<%dsr#*+NR>2 zpM@ena-BQ=B)7;sS4yIjN`l@~t3=O(&BT|Y(4jwW2lh-Pu01_3(ep)nf6D`7Gsu!A zUPC%=_50`HJuSt*>=tfvCn7KlM!Szozaig%I~a|FS6S^fF*ANJuZ=-^yXySkPFdtt z9Wu6>5|uZ;+exAO#BW7b8V``ZE2ThGRvWL)LeFNl1y&T@nnk_CFnr}D%x1%}@Avue zki!si9Sp!X5SoeGCfbzj&mDP755d2Ot3$Q%Uvl1?bCWWqyouM-KA-BJ7T{jsZq-|s zBI2k!EEO+?a&z?FCFJea$_QR`)o%pdhO02g%3Fz&Yg24h&*+=4fLA)P*m^?BnotM6 zkr+Qf9aP)E3hXzD1X9uIFk%~P$PlC|B+oJVGzo^n9i^R*rfBg@tUUW318v6N!KboX z)fm{$W+8L2&B#ZJoVNH;sAqbGMk{PQ+}BL=4gJ>6o!Ot)hOPHo>l(-R+#>>5)Kd!W z2rmx8$wR5DyhxN2^8TATsF#TRs9D@lyb*M?5kGWj&?@-3n!(n~zE8n66$6)!tD2Q5 z4V&7Tz3H)=xoccCml*ZCTd%w@Q@>giSE`iXBgCVe`uKDB{g9#kh|I--k4@_F)@COZ z%HNn4HP|iY6%fvO2}$_BXpQ)^NsWHD+al{t+Tc6*mlpn(dnEaY=P1GV?3k8Js|NS6 zrwOlqRNHgXgz60Gm3lF&*8Td((D~V~QtNj~cz%4?uD19+ZD~(X@$64Od2GXtc|dfm zMFxmX1lPV}sJ)KoZwx)nVYH}TldE)E%j)SgWVTugv{9auoIy8BK_@T9d-P8Mb&(Uye7von(80!?1vn=r=OhY(b2SxCyejBTTBn>>uZu@UypI?#0 zM0Up~4L808{Jw&pS`P&&vB1x-Wavucb@42IMM^wVunJQd9|iJ#VBf`j+2c`1rpsnWG1=*`-ALg`Jhn zVj~U%b$JaLC5&(6B77*C} zAlDQMW@+Zf&`VxpjgO#x>=!bV`Xk?WBpun>h(78z+#isW8!G!;IZF#kLyx9i!CyFE z8PbG|PDq3E5M88YGn7Wc@{N6CAFGzJ`{&D5H%r1W?R=?e94hu3>7L@I!%Xm68fTr| zc=WX&h_BV*%SO{k>U1*OEa0yj`YD7dw9~#298ZA!x{&u6kLzvA8~U(VSOLSm$VF;whvCSO#J!PqAB`e%l#Pqv~TK97?Bi*+j+q^KB zEq`8En(wDy^6VS4PO=1L+HiJPq(9o?%y`Gc#h}Xm=Kthf&JhMLZN9y7fFgcHD7o>u z2+R7s)0vVc%XGK%c^|##@|p>lcc$-JEsd#FKZ|QkFqMLd&Pu1Uhfdyb_*X4CQ+)S3 z0Odqh!2e>}pr=Nb{W8%9yv)eovl9;htJbEGSImPB;3wTPutS49HnmJ2k6U-LuHX9x zPjpV(O@`yg1VpUs-w6Jkkp3qgd3)F>*{C<0v`yN-k$B``6&;{9A6-Xth8UK&i*%15 zo*#a{_HFVe$^2$T!e+tJuR5|>0)}s;8dekf*9d%(wucpWw@3yCD40|=T~wmXc|Sg7 z%F|y=xD8~tCtB|jeXX8~3aFD{krr$xdLT{M=%g_L3 zmLh>$=n7LK0Rl*MA18$*d1MiLL-F0&n3P(}7ZH@`ej^NR1GRUDIZ8thP-GcfyN%Gq_$b)``*cn4-W%J17w>i#fEh&}TADqcD2 zahQk$4HA7MPpCIJ@R0XQBwVasX_7D*q1GJ{<-OWV4`Bs$cDG_*+??z!<{lhHQsJIB zwU+FsOotAEUoyWT2P&C0oCs>39rUXg1IESg2L#|}p)|Pi*4*H#btX#cKirAZXJa1h z{aj!CmCufI^g(u7JJ^diYo3M4Gpf+tjwRlAmlx>LglG4|h5tNuMIuJr8%;XW*h%*T zILS?eu>FT|t`0pqS7hH2?V5jst08CudMX1K7CK3L1@EYeM;FQ@c!TG2$t-5e|ET=t za3e_`rDhp|8g;|ZU$JvDgP4% z+G`2bo0-x<_}{1VU1Gq`C)NU-{!O|Y7#`bVFwfH|y(7J^V;THfxRSjZ%&%_m81u=w zl8gS{-6iLGSnZMgcjAgyh1B~p1UF)4Uwu0HaEgDd_S;1M`%PJab$u;Q`xQ>cC5&Xv zNZ?w_308mKvspddE`x~1zT{&IvtyGA1QC-~+6&c#!HP+L4<}vD77DE81lDYRc_DB? zFW=~LF6NtHoM1_~h|J|%C;+uAu7#hwmF@csoFJS4-Gp|bN&e*H@{I)*RFuHZKHXEC zD~x}E72pxZOM!>4C$+ddqC3*$3&OlZ%de~?)&F^pVStX@CV>&9A`hk2R7~iF*yWv} zQYMXK!}m;N%(<<2shFKb9|!z$CypA~UX3x1{naIk_o(%}HH5+kzQbtD*&E1wL;3-= zIW$AZ%qqD-bnp!T8+FC!wUVU_ZL(Sjf~Y%8>#V--{qqEKX z#rKNK1S$He7$C}vndHko8kNr9otpG7&jj#M4xo|sAk`cnr+|j6Rv2(FJao^i&U13=m)o(|B^`0s6?n4)KB?1=-jv9Qd;W2kteyBp;tP`U z-uL%o2F%jhyl@59;kOv_X<0uCLsrZlF@L|4@1*VCTdnE@r|GKAokyO(9eWlyro=uXbAn)R`$ch;97 zgnKPO#azGlcSsU|2ad1Wpiv0|hVEYYCV?@YhhjhoyZL#7L;DV(hPOAhlw{W~~@ykjZns8dE%nG)<`sewwr$xTcHs*BC1yh+Lx^^&&?Yeq1mgqIKl_FP#Kip%HCP_)|jt=yOjaE(2YNo7Y^-n;U1aq^W`4d2`|QH83L%2fwfLUeFw{c@Pc zqebj^#QT(`!j|P$p&U!nY~4!X7lQ}uhxd22Ctm!e;(OOZD~8Ogf+i)blc(0SEt0Te zp0Q_-O#{1XnD?HO^z_#Ht{#g%4J-N3QkO)bAHny`ME{9MP! zlP)QPjIn;<^8rntrsxz!wC37~0^Tmsv43S5KC7y5W)nhP1RJDve_rfEH_}J&4C7SX zi*;=R%5pVfKM)LSm}^2)hu0+dVp##N>^l(53)b3Uf|jS+u<~NGxi=hi>i1sc&Q-(N zF1sE!nh`}Fm_D*t?R(dsr2BNgUu8qIW&%_2hsG8C!-_XV+r0ff_M2Vay?reGvX#gL z-Px?MOgZ1i-WOnw@dC1RSh ze(`~A*lEpA3<+)BromPw zAOR_|g4cSNa)t?TyF}>3K15{il%F(rAF@1ku>>PQp<_|R)}1QHMSEK=OW?u<35?DX z<^8x0z7+b~*kz3Kf?_dQBlw4GJC8?ET~4@Ex`E+yl+I2dxseXec=X@l_Ra?z8V)e> z|M#`fgmEhQana=g*5q?^51dL%<3OK#v}e0ce+whcf{BN@in~|$?s;PsXYLQDEm7D$ z|FiO@Z0)$Cux6?9ne{jj=p>&ibwM&>u|fFNi9+;u0{<@0+KJtP4LYly`|jAj(J0nY zRvw{xqytFjpFF?I^>v-txa>sv@!&hk2{(IKtM7McA6cK3F8#t=qdmCZf5KNyAdUWW zCvp=0epTbZz5nFxl*d+lHZ?Pc zYMxBwD6|;1c6o^4{_Ek?ou3u}gpE@a1pl3CPpLin)NdG$+1q1+KXYc75tb^y|DBRb z!D76AoGv~AKz++~gy~>vrZD$*`}f2955?YEl3P`@?N5dK?l=D2b@v$`(5+h6W{8f* zG{;325PozW12$WQ6hES_KI;cP7QrG{ZZ;!{MweC2l=r>U5BQ4;@cwS=NzWYAX|dZC zaz&P!*|v(;B)QMDy#+nmEgnQgdjP+SHp$N1N$^|t|6uNAjg%5ZFHzp+RJy(<+Qm?p zBp6(Me7fb_uQR75DB`MTpT;m!x_d^~(}5OMR6d{WD&_%Y(I5C1QG4B-uxPc(`c2X* zTi5)s@#bCNTt5ossrE8-%niDV&*mns-AZJn$i4Y4F)iox_w(RK&!0+n+ViYj6BS+h z__HKahVJ5=Ty7X#BSV8{FaPttN=JUT^yXS2 zzM9SddSYZ(laCHd8rlv${`k!a@?wpdv)ypjzo*Nl*PTQLA;I6#?91E<=4uy1wivMZ zF+~W+>VdpyFPL6XesP1pCN5EX;~?!tBw?mF;haKV(R<=4gf-Iji87AwQy)3|0?7`2 ziRc2OY@f}nlQ!uZnsWjV0z07d2bPdI_A-OEDFn&ZNF*MOmwj1MgrsdoPHV8bQjode zNXoQbb1*fc;M&PLH~5wMU*Qtf&E8RWB0$SYA=C*VlP0Tw?5%o#;Jb$=H%l6TN_;R? zdVmWZM3M9aw85eYC*jMMnhl)jSk^l_@gLT+eiK|Lry~l;CrTFee}XWoQK@K$X(4Z6 zQKTEKZ`Pu_I>ESpD0)CEa_Sz#2U4ZBA0{lqpNImz(9BbVCT#e5{jPBq>wkfUqf?;D zVY{H@<4mKNamz-7DN_re$cj0)nbkDC=>ElZ$)6cY|U(oVcSo|ZcGM<~^^PW2K zmEG3UR5-f6L%4Ox+~MaQ1=cX#-{Q$c-5Zi7MTcb@?yHGwUvW!2VI>nYasAou4x*aY zMRoi4T7d5(WuReOMgpepevnc3E{9(ymEmbU_{)n$6-KW>zwc^DYW{@z%VLj9{F-kb zT>w^J#q=lx+pZreW4}mI?LY_L{zoTem+VMv5&7_-R4we%b@SB}w~3rVa+@}5zKGkk z;do;veuqjF=UTLYyR1*EC4CT1{c}*cTe&*W>!nioeQoF&yU!+Mz>h8&DJw)Yd*gS+ z>T$Dx&6g*BK%;cawc$Qbc7OP?kY}_helOk+nFOFIQN9-&41)-!EcTFFt)}17&%eJkVV^Ji!x~-FJP`#N_Qs612Nr?`xbml16Y4 zIrU?>%Xkx;s&gr9&>WviRC$Tq%<36^iRTAow`DM$*x~lH`I$_#b(!>%m(~RFuWc6Q z9c%vf0y3u%T<`Dn*-(3l-YtE#(Fgxtjc|ZUFny>~Wpz~uh)Q+1gK1LX2 zGo?@?Q3pqO9~H1RS5+-$|7X>v{5N1gau=M}`kY*HYQ6Mp;;n+-NI{+l7vh6->j&2( z_dh>C2@ksdSox9t>TyR4+sjz#e%3^YT3&%*W1l3$h{ zX%cAOF;=02&ZPI!%Vtcjl`3h@<47D$ywp7^K@RU!uDr}UZtHQ9;D_+zWvt(NZ0fF& z6@WV((>bE+RS&ud;SyYZM(?O4BcT`?lJrkZqhtBb=~n?HJ-maANmWC8Y4JV zJ9i+7HUpLZK!f&L@kQp(9Kb3@aGa`oxb!e_860u<1157=cFT4do$>y9zFfsfC^Srs z`>zN+CX*S}M>NkaZ<5`RdV;Y%2BVH&o}|5ote1KjIaF|%b_eGT`=Lq8hL((yxnOO> zO$Ol^w)1wBUqlNS4n~k(qT}J8ImeiOY4_&-Oh8AVqj-#>S_NNVBGuI$CF z)514pJ~lo)|Cz7NMzJ$hGtoHTJy>|SMyXIFWxJ6OiAqexSt zXzusN*P!!Vc^)vkzhajHv@X^&rqcM|PTZQb@cdUA@AD0ejpj$Y>-R1cWKm9nIi2!) z;owd(U3R~X?Uwhd9S%x)QGWa<75kGt$I3VnvrYaALdP<;RZTTqx}$%L>$uFDr4Fdc zR)iM>7Ly?3B{XJMb`m_m6PXqH;JU}1T_dyTjRMsTc3kpCDo>m5^V`RFn%5&LGpfRC z%Wm)XhvJta$M;?0tKRnX^*0Y8fT0T+>YuV={%pq9BzE-dM`V9RIHZ+LrAm(;ChTR< zZJa(13*5eAf_ud&V@005KolUdG@jM@?}z{`xugFJI2fMJ{s+GIDP!q|nWzaI%^#Py zN9EVk(p0}qGJi>6WGraGowIYIp9;85>$-ViwpZf2Ea`UBRQR&E=@Jxjg%YxSLZYH+G#@&4vVlRj+3A^h2!P|5O3j z@3J=8agRRcxrAme3ZYms0x=1g;9xCU>>>Mu%B!p~zhYf%PSeuV+y__@2WcTl+Shk%%M^A-u zRuLR834#t#OUcG&de?(Wma!?Xke*0A+<$FHWP@v)4uR1$NoYmB<332uBcul3UgojQPKc=$?`!j(?imOjeDqnBS*;_0~m z1xIO6qzt_aSA?B{ifq!!s%E%N$aV+6tb8gbLXV(t(I2qndiTv9_AX*BUD#INYWUh(`h3A>yMkAjkN>sWkK0iX* z*A18OC22BG64>?O=p529%$A7e-zy#;S!kZ_3gO1lGIeA)`Sj^;;=nLmg#A@i3)QGO~Q_xswIZqo6USpy$*NxZB_sEBCd-M;rxpL6Yc0$#)GILkL8pYQaEjAM1nYAwF)iE^&b*hf5bcJ^ z&u4>zgcjNW+t$E)n7uqj46NLwsNxysJ><%Oep|a?nWth=+|vggqn%g6kmR=G4I}41 zDGKBvQJ*zPTYVK8a#z!8*nL_81~0i=jjFiF+iMN25of18FFakQoo7-SYD6ihsB7-n zhT8iFv}Dz@|V0^uEUI6X6l_;Z}X7#z`O z&Zvf7RyU2$C)b!++-jn#atwBJx3X13Tc3YWpX{)_12p?^mGX-C+oNuyaVDweb*(c$ zxw2A+d;>j0Yo4yrluEv7ZAuy``)nC~r^XNKlVuj-_V1OOOCSDtG`jNe7ks#RBilSF)Q=2Td$PzU|lNK>%O1%+RAf0AT(JZt^_knbph}4o$4~drs z-4B0@+yXF4;z=9X!;X_(tQCe!$H3p~;30nx;FIrAmkKKu?Cfg8KSAvV9`OFH6^0Kf zWIfgS(*N*dfJycQP7KKw>|F@n9EQj!R*ZocumNxzp$+w7+WD1JH@a}M8tHUM?(DQ; z&D7+e!ID2Q%0K{+rnr>Ic?0YYA=$w`+p|M?rAj%H?7eESLwsQ{Y)WWi&?0(ka zrcQTq6%qTrR^J+gN3(9Q`u0@^z&BTV*# zoG#~(E`7pg00!3_S;CA$ww6FFciN-|Zbcrg$PuVZG(a&_ixWi-`Nvw{7{oIFpkIEd zzOZ_&AXus$ZM=?-7BXDLQP-lQ6XQ914Ay;g^C;yxsNvU3%iTPA4 z|N9uGEA7!{3COzna?Z<{v6_P2iG+?Va=W|=ty;hvnNxuWP~PI0++Q_hrI~yZFqkaA z3p0h7aC0zjE``e)^J7OjJ>&j$jk9y|gAoC_26^D+wjY$7$3k#P8iv-|tppggc9|%oF=KJ3)pt7 zTI@uGI$CMBM-hUL?sjsv^*CT*KQm`>{BHcenqf~pRJpQxRVi$3M!DdMUtt81 zIvL&PglyJ|SPeZKVA8mMQz@{G%# z6i9*F&VO3-%p{^W>0C8F*0pc{Lw}a!f6F9`-0cYZMi^F{m0%p4FTrq=ERj`W=%mX^ zed3$5;~ky00*ERGzgTi?v(LRn%{E#ujqpxaLEbXW&UpnSRV*r#AQN(*giz+hB$jCb zG9{3k7)oK)KlTdzdj+mp2?_!Z(?Y(1-*%lgDd3H@RNOPgL_B&g&K z#SrDqFxzJQ2;}-h)Z=(S;^xxNoumA+8W_9}g5LYxLvH#Kxw`v6lb7lBNs{Ll9&{@qB^@h!jBZQ<= zy<#a?c8{3+!`n?1XvM^k{cQz~^K(CU_VL1uYKSghHG#{|Vs5$EJFvh6v+5Fu}c*yLuS+LfN4e1HKB-e@|1-~2b-}cW#B=KKWdxr*&CS1 z>PI6a4DGq?XKmvm8b>V}invKe!2K^CUsg4OEUzm3_x6hVA3lxUQ(_`&4zi=~Vy(?~ zQIxGZcBy$g<^CLebJ_a5Xi_Sq(Ag1v2{zHSr4^=7gbt?+KT~~YlW*$AQZBlR+8}qN zU+;iCn&9#(PkXJ)eUuNR{b)BN*F4C%j8S@Cok707?$IjHY8G){X7~hQGNMj!bXSlQ z9i73*Zz$|dr;hiSb(6Q9`IiI#@w@Q&vRE{JvN8TtXfE~$s?I3XTdD%B0kAXI0#`OV zNrtR=*0;U>r3@Y5^7kF!{9YnVAqThiURH=ZeC<{g@I~N#1cz2mf;wIKC9urv5odTDuRXP+*p3zb#dC!1)B=={A~Dk@+jhuuk3Wt@ZF%yS z;`nxx6x@bwWgXj4JrRlnwMD;2bldybf;Fni_bD^jQOj@pB|d#c11E-Zl>FV0_}Y_L zW7JF%3539QOQLR8PsyrSB(&2DzwZ7h_=PFHzji*=_vhriC4o)FFd-9MC&nhz<&P5A zWmpoZIq^F0t{5rT@-ejQ8oN~`C>+dseD3|cqd$KEZ-`hbzX=D$5zCo-tD3z+xdcT`#t)!H_B z1ao-rN5Ll-6EwHodwbF9b~stRBdu%C!$|DHbph5!oacYqjHKL_KUDsKF;EX2JPCBYPo!x~Re!FWSi5t6{;>D8Pg%9Jic73hS3LZzh!g2B=av!+R%`NBCcl~(35#pvtNa6EY1L& z`GFq4Cp#}I{7XkA)R%XFp_HBT)2eVPhWW82bby_7c^BaJ?2l>HM1-M&M=derDUYe* z5T-cyVr+^JKI-Ia@y)NisGt85KEnaak5>Z5dW}m^KX%)AA^q{Yq z)na1h>r1+NT-#w@+ps^(^NaxNWlyXpdBa@Xp`*^w_HhsSPG>~|z+#i&3WSD{->6(9 zS8+^y1>72Q9^b7ZaTganS@tYdf9vm&)&`9-m_af26(sQ~b8Kys#2F*75)vQmfuqPZ z^BI;ncCHt`tD3}_rg1r=Abk;(N;*}CpQUhNTaYQ>Ojs-VBeru*>Hrg3k;5}0y6{|$ zDa`VG?cpT5(jy2JL~Afxa_BM<&lx0C<2#p=$naj2}G$=L-lE(5?XQ86T* z1CE9#MXy>`fT5zPW&De>c4TZklQ`@bCwv& z8T2|v48#+lzFiv!i(o4LZ_=k&tU{*-tk_>c16cTl1qYU#C-j8&^up@sWz_)e{5Ieau7t|f{I zpLMNyO}U`NI`OTU2shc~w>q^DPcKXzfIG(4UxiEyJkK*Vzy2nv)^uPx=hp|4IQKoK zLC>M4;qxFl^Lo7<@x>Dos)1#AxB;~*5TFMvyfNX?EdE8AZC-6QbU_q$D;tI#=N@{a zVcu4~nWe5qjwDs1J)Yok)_~p5sjU|XlRHktZ4BaKZ{8eo@3LG%}EE+L>#NDxN)P6zP8th)yVB5}E;-<=+ zSL0J!eop@-!_Vxv&&adYMw049ynhH8=LvNNH@Nw+Lfy@U(L1gGetR2-FvzO-bq=`J z&c^JKr*>SO-bk(06$(3^Ux~=_7#WIcQ8yXIA z*dJZ#grv0f5H45o6z|&?KeqYJi;3F?9-;W~T%J}EA9?pRj^KKg5plNEwn?lsW%k*` zVXCU8^J|8fWUUUoXi08vBjj`bGZicdWxsYEoRZ$kYm4&AyScpqjZ}?NzxyZWT_#+xcH=n!+1pmW^PSAWGFOkj` zoZsBT-Fiku+Qu*}yE!AIADOdz;@nB{3t{^ZX2{%raBIPi67sJDfuDo)xZs~_dHI$l zi)u7N3jUC$#oO6zg26Se#MU=_|M&yktwphKeGKyYbfZU1%g(NPg#Rnaoj9IsP|g{&wian;g!XIEH@@&b~9H{R9? z0A_M3bC;lnPv!HFTFv-XJEz^Rnuw0>VHOcPec`zQhZX_ZCxeZ*PCS~4DF0++Wpbor z37v*zvOX{vz+T-F$FED^pR0Pjao9gRf-4|p$-|F^rg%hK<+%u9^Y@Nlg?Tp-=2gGk zgz<}lrpNcc{&Ug#oXzW#QwgF*N=VN7ogKnFhhFChye`yC%G}<=IDfCF=lIS&^Wop$ zT~^n!UO|`zLBA_#$$LLegw&*HXaxTrc3<%$Y&m)@S9ew9JUv=QUcbgGNf&#(Mk3}B zpW~ZEx!;CmPS_gFA{wygvWNvvB?k~S z$v;L>@sa|uTLP?JxUA8y26Czi)J<;iYj0Bo=@nhPY(gT`c;B8Exaf1`;2>qc=_+sx zOu_okyxcGGs%L)d3Q3JBymoQIRt*LUpV+nRB}RKrg?tL#0gsscZ zoqo_oLS`j{;Twe_ao__9ZPr}|GkC+Y%HYWz zs8Z^ZKJMFCZQi8UOK<3Q+*v3tk9smrrA6+Bv#i8pMv}Rmp<<4E?5P)!o*9o3ER9<~ zR6g%AKay07%{`aR7;fp0c#`x5e@cP^(Yk@J|1M6t@Sie*cc|S}S?pLH))B%w`chsI z-kuOPgLsfV*lei%j~ty;RQ{0g(J)`B~*o^z4Xb1d;oo!$ra^Sj?AxYI@R0} z*Asjg#=BohS#kdcvWH=Qh=?g^lM@6>OSQ`WJ}7BkVG6(VuFL$jvj2~4 z5gV)OctiDX@p_QO*LM1RFA^CVWb^zG!TKXiC>92K5DXEU zDgA239ub|q_#^@6U2?#tMqTr=W`h5lf`YJIIan;D<~+a?C%qPsNt6Yo*s6p zhRZ-dm3P;JE#4(Al2lkAVKIH?D&cNg_Ngy_So!$on69B z>DV>>5X}g-LY3R)VL_+x+Qm0RMFEsQvgfM1tj?!$r^-9Bb;qrFZfz4EGnXa`~Ici zrpAHv=12L5>X@O)iUm*U!;_rwS0-$G9=F!rg_axF*zTpgM=yj1e!b29mHg5q+eX6p z!xeoGm-v9$nC%&a5|Pj)V$`ZB{gG$+z>4TUq3;rs9GfJHzlSY# zfcvlAeo-kdCu^_cNz8bCB*P#U$W~J5K%~((_2V^O``vkn`j4{yhhC0a*&F^_yLAhk zRkQdssL}B&$~W&p)6!xqCtf2%nKbz#sev15teg>(V$V(7lC_KR`3en28iG>1KE9sk z`I33UcAFVA?1eSHm+ceTyeg=cXeXtJm#x@#0nwpkwKN|F2b~C^)&WABW3DMq2lVEC z{c4D^*c1AJp}6a(?zY}ydKcVURKiwXksZ(Hi77sog z@knuc=wRX0Sr^Waz&U_qz`EK<>O8>U(u88Mw*+fm-5$xVlKLQGHrj+GV#66A1bN-~ zeCRwv{&#AY#JB5K@F*0$NA#xyyMbya{^|4Tj3i;$0#hkMDCf6u5S5DYEY0{Z9?a=n4!W8EJbLnSjwgIGeS zc>w>n?`tYmXorJjMP?K3!WKcDSVla5TalX2-xIe~{kW2Gs!`iIqod-D-H)={Cr&-x zUio7Dz{Kg`dzKOe#-2`+ov%GKG9{O|jbUq7p{j&?&@lMGzyu=++%+$WtHsudP#+dU0PmC zAR6E$Nv-K{>pf0>Gug)4`t6J-pHjw5kf^Sxj@ha)!>Hb;n;`jaw|P-iZsLU1$tzCV z?n22*pAH-9CwhZo{=)x13*fRak8ivatGku`aRaen=Fo%Hd4E!qoD-}Z z;U2XnR$Sx)X>aC9$wr)cY3XXlSp9uz^yP};4-pz>GQoV|Ih%(aSWlssWfE?Z4kJpT zF$8UVA$Q!+D=M@+>=)%s1B+JQH6Ry9Y6ObDu)5F@#n<+1kPGsCe0>klJ8K-EKka{z zFh~M_6Vo17+6k;p)&pYT!;3zS?)t6)R7G<_GcBMyB)StY@Lu|vQhJEnCe)ZB@)Y~u zo@h$tNacovc3>d8cV)PDKS41y-*PvXmqnY6l}LU6UK%lp2c_{9J6xSQ*>Q1kZw(!z zDI@RSZ~i#Y8zGP!!=j09vM)Lm#&y`NNC(E_hFt`Qz9@Zh9Q9t&Z>!&V;J)Btt zVhe8T-bqI8i(NAD3K`3b8xfIrcF5y`j-i!5G=seD2WlJy{@sp*w%mR_%4du=N?g-X z@)})7?>A#p=6oLCo))tl(;?5B@DYdm>&25o(IM+)f+s=2UTXAqqcc2&ddo$>{%ej; zDgE6wliglw3Y>u9x0e2m`)Pike&Su1&mmWE>$zXPLYv5zw!_)-w`~2|z2lhCR$6n{0``p*_ zy6(R|!`mPR$cf`%oqGasWw*wgQDPE)43rL~P`qi)Q~a0~O|n4>Lm74fY{E;VW-i$M zQ3CiSL(rbwhH#g<2fOLTbTY897(%&AA(wmNVqXtN)YX)|Hn6rUp z7shotxyxMFVAWQL(E6fLnX44goSjc-=&gnsCUhZ1oi5|gEQgcAxR5YNNP5wt})2ppa1SUUg{H$xK6KdyUFr!k}apIZ7 zs7nC>`no^$(O`iU35q#K8{>_NycWuV&W3Js6!CTYJnJ<-I7qI}j%~!YeceyXETO2+ z&xx(4jJYt0W~mRQ;KmPAF~zfi$6n`&XSW^ybdRM5-7+Mwkd`Ap;mjQXP^f z))*#!o`A|znW)(_pNc?=OwST%G&S-m1_t&Xab4gcdSU z{ud64egw>m6!s^WDy>&C4opY|H@;EJhxe3Od%aC+bkY0#I`!3t>FbcnPpYaM2Fe8J zJcWInmdLJR#OvuAH9K=FT_uW;zR4Iit4QK?(of*)zJ$WkR|DX~d#vSkQ}bqaa+4}P zUe3vL1@rGopMAE+OP+jU-6?4N)Uaf1wT7j}7k$_O70N)_?0WnyBU7!x+cj)j4}4l= z!^g|68c6s}VMqY6e|vj0T?#cSHCtnmg6Y{QK<3;X?g(yws|y zz=rUma5Ck-I8TZ{b$I?bcdbK;+qkn?V+bz`T6U=kpoB4)+M*(;t^yN&Ga$B z^4b}=!xl{sBPn_as8*YGs!Q&sZ+mvhvDi6AI3(cAw}5XC^H@ub z`Fb(t?nbHLD>$7ct z&1gZ`FJQIF1DIHDZopK$ROiK_OU2tNXEzrCLB6oHKC+ucJ4Y5d&Dt4s!K@*by{3Xi z1i-T`)U}==2jb<{V^wssWzt-em|&J#K6LK*M)YXO(%#!=2RE~X=nmAG&$)DEKUySa z(^oufH}^xEpJHkjC7)A!b+L@`?J1NNv_d_O3gYVlhx2d#b;a&GwA~h{2s*+qShWLI zjLaD3A166iSXz=6DYQ;=_m?OCUGVW%@K$rp*z7%0DCUZJ@!UKbjLpCXB)Xf5z}yIr z$DSn_F{^UN2GLh%`M(^uq!%bH-Fhz{0C>3vm{)}xtMo=DWTZfwJ$t;r=IfXgJWnA0 zOeb0kpxMtsp7h;3QiQ|HLy2KZBMbeIKP0j7-+`2Zh6Kh$Z&GpH77YSB+Z-A?V>GIz z<+VK)a1^n9u@Z!!0Z&+y+@Fu0chu;rQrg5UPajutyL=PBb$sP;+tkh!$rA87c!&98 zQqnN@?xzP1Xa_}P!DJoQVe*<^a8}mA(f|SyhBgj;>Yf&2H#3puc@S&-4G#C)EEwUD zB({;{@yB2*|K0w8`fX@8Z%3$t(Z%7?oXVO_qD*l^W!w+)-yB_ErxXzZgQ5q@%lZG- znS5GlhFu-TXG-3mWwAUdK+DBNfyUA$h=IYKgt4VG3(s42;d&;#_Z7uW(tyQu1;-hU zmk0>zhVmL2%-_H(sd?}=>aT~`!=x z7U%fxIT@5_GgZde_6b*ph!E>inqOb_U};-8lC8Ui1={kO`Z zwJtzIEW69s27W-6qo=b=xGp!5%86{8pFR(~e;PszzOrMBDBD+IvFzx*yVTs+jKJ)J z#cm6mb(H#Om|J^v`;Y=Sy{KYZh)F<~xtjJ|J^yZVJTyKhQ}S2uPZ1m6_YZPjyzTRc+Q}I@)+N=~#c|>@VB?@2k@hhuqHH;!LYt2^Wcik1j5HYJU@u zep%81lhbMj+UXwZC}3yAJ0k)8{AVd`$T2x*XP1RoMlKIb(tnDgF8ByqJIUth&fg~y zQg7wK2Bc8qx+mRUO1FIq$LN6JxH#-|sZJuNIMX{t`id9gF949!gvQL&>mmQXAeGOS zphZ>aD?g&V!GWCP$=A+sOMX^yiY+Ic*>FB)We9`oX@{&%w0zfaiBVSv zf;L^=(a_uciWy=4N1N$$di}6Y`s$(?>N7fHEcR#exBTU@*VJY9m{2mf@H0`Xe-bYI zBbj>*^KgAwkPdVpMhBiLRrcM5o9|%?+ksTMVKp6^l5a$in0~a{+1q2`7=#0joE@2}NagCkfji*Nhe=SwP%UmR>rJuUB8eD^y)F48^FT(tx}Sdq-RHJPK6o^E6Y zs-&?^`>gd7AC?rW6N8@n_Fs%OxJqmPwyI5(*Yx6BG~Fwa4|WFtrPm>zSM6_8!|t0| z0Qe(7RY(=sa~bT%j*`7LxG}519wt>PkvR0DOy|Y1cQ}gYr=fc)(aJb45%8bgsZ`U- zqLeX78WK*T!OzUaJQepj;JwZi8of2ZtzEh7GiP~?e|pePJis14k}j4TV!t(BgI@rU z&K$<3mqRWU_%l@r2SWlftnOyWM`ody?T8sEN!_d|DY=7MMGlZgZc%L7svjxj#hUfA ztc>U-sl(jpK2#_PhYG%I_i1dLUdM{fdxl-xc4Y1#_?{$2=i)IuJlX#1GjkRzC4K;r zzxfz5sA~i=Gxwe9Jo;%*+Pq+_*xA%!;eDy)k0I6`zx%p+lLskjnK@1t6z3{K!3#e$ zUMG#k@`HmS)W!tJZL#4?7>PhTCI>RD{IBUw`HA8Hig1a5{mX7ndOuJm_A|;d_^(o$ zWoYZmiK!BB+4TA)v#@CkhKK{SV7HA|LYLeEdjPDS>-%d_T*2oOTrA(EcQ=v)zVVA8 zTfMe;OC-JwiWq20Hu~=yYTTd*VNq~#wgV$|o}^ z?sW_47xFXi&r}?>qe8371-@0SEzeK*lWP-Ye2HexNEvH|;MH6S5nH;Nmi|!8UQT*w>wphfhoqJ^`N~LU;`KrD72bHJaJO01FofNmX=mcrO!$>3klT1#Jg#X3np+`I(~d~a z!N}c=sG7*kv1`%yDh9{4wx(0BBq~_Mg+n_@)(*@+jD6=w-VLs+$0Y)0sh+-Zvdb!z zpUbDAyI1Y|;2_5?1{M=3ch)bte<4j(VEYvQr~G~!({ZK#__{sp1n*U$;Vmq+saAW+ z-1AbS^i_THIl&spF4wbX_d72Yeix3vGk@0eha6}9+8~+NhSXrbqrSUWUVRjFmZY-k zls#^sBj1QvouYe-f%Xxoxw8>w}Lv?Mi6~3Xa9zxbsI>X0+ z%9@TG1(jU?U>W;ajL=|5{!h}zMJ{>QB`!6~;Vnpa%F%6}1K@Mefhcih&*oxu1~i7C z#2({UAj?oR#VUIb=pMHtNY%f7T|eT<_P$Y~Z9%dK=>oUe4u)K=XmHvUZIS_>C&lAG z0g+hp{i=XaK;D6RKMF&$@TN8;NRDN8NRR|oEdu!pj@698URt@pkeA6dL#74-slqUn z)!H!Vyr>2Hjm65JVI4^1H~6fFi9E^A=MBA@{a2q$c#bgo6sMb7t=<~3&1C+eeRmc` z_(#oR@iEjxFUjc48V)u}(_AHW5k>%|egkpI@B~&|F984VL_o*^|14t9mIe^PB1l96 z0GO-snvTP!r9^3bY{2$7y~5c&R7&`EbX?DqNEEp+uHPO@nx^V2@Lqc^D2lwijO%g8 z(la&|z%2l9OG{?f&)%2y=?Hb6Z7}MzY_ZH;?9I@=jr?!axW1aS6%y zeP2NiHFp&-^{J1}=Sp%z576*Zo9RD)$`ys@O&h3Na+V`wJ&T(*CSu{_C;1Md%+NF1BKAOsTEy?%y3S~$Juz_&yW8WIV@J#EKaUAv~8^zHKK&9E(g}y zbKVXFaF~DUf4MKYyZ^G@z`3XPWo`@6*>)bLbiwp#RL8McoCSA=X@`c+j5L$|h8*H+ z(qrc7R9pWYH!q$J)N~za%sxq!KS^jm>U;3h9uacsL#W46J1U{~*(mU8iLk()hluE( zpxBh?0;eoH%A# zk1`$R8-sNVQO!B;xa$Rny4tcMH;&`P;XDJ+D5L|MqT@LX;gs7$F1zU%s;q7R2V&NH z={OrUU+c;Q_E|y_O8M3S>!uDr`oU47W_bq!)rqH<9S3okDPF#zd@2oIKc1!^dBWRT zZpp@YrsOcq% zE|hR1q(N>E*9l)jRo+z~e}7RuJY=_pQ5&;2AZsbNN5L!xTk*>rc*gba54L#79o6RY zzq&yF>gmFbkHVDoXb)Hv^bS4+&`^vUKhxu7RWRGeE6XCm>v{Ky03aqs_}q5`k5Y= z32@xTo}Kd-4Wim$^VA-__p6TgX8DM+hMim^SOpoc|*|&$iMUUfH44T!d@D?Alq<|iMn7ei#+~x z(@F@V-~5NZ-lC9KrsMhLz-oHtedopbChW>qe@&yih0P)M6GkGT=X%qYh}9>$2*bgD z9-V%mp)dI#eANJ^Jtp78_DGf*`r`9E->EtL|I^IFfyd|JYp^+3Q7Bz5%JFBtzt>b|1<>wiFIsybu~=h_@ckZQ;<*Fo=~Fu-xX;D zZA?hS@gzJR$af=AkUWU9{v17GrZ~q`i(a#zGN4=AKX_Or{yZ3Pe^jEqYM zRA+$NrG^z8JzHM>(2vE2)|~x_aRrh^w4Gr%5q@B9WONJSC?zHF(Pw12)hir28120L zyzjT}7&>4cE&}HY>QUqQ*eqS7YWwu%xQ-TXdfr3s!Qy6oo!Vn`rrc4Mywgdt4sZ1eT-yX&e|lMA1py)kQ(}X|El-$J^4~K8+|^s zl4e}k+4iSpvtKu`@qxdzzM9k5H!K&Wiwegzrh} zkW_lM<4NLxCz-@um-EjUIp3ecWdy+&{tgZKP*Oy7jv@fzX)kE%{$YMqB5n3ofSA| zF%i2;t4(2^Gu?nfUn2O^#SZD@l<*0Q-Z9EmH_x zfxKwSzZM=zM3O|j{(+!#1eNMn%2%)i`@!#C#4jOw!AitLd5+QY)I4cza^lt3J6YPj z7RHEMU*k~CZM;lEh4)P_28BYO-)6eWob*V+jZ*1gZD}<=nRm`N7#LpsMTYOCyplu+ zO>p>BZzqTg-+}dpBcknh8Id)!$G-HVOiv9XfoC7}^*Zk;ixcnbr>lOHqr{q!V_n{} z-mnY#I68sopm}Mp=@jgf{lLTU$>GT9IyGJ?93*3!8WLZ7Ou=kB(nEi^-{@+Y`y8(d z(U#%}*kDN$vMdh#UPdU^fstQQC~=SXx5zPFwl4drtzO&YfmMj~bL_qZyK|z#g^9!j zzOJKgdDim9t}q+N0uB6uWUg0;yg>$pW*zvmV!W&*2b-os%un^2GBB#i1^aAvM{$S- z&%aow3qB$Q*sONcj{)~*jKLb-zMUkZ^ViPPBe`&D`ujzO-)Sq2D^0Z5wo@ z(2%=q6zwQr{GB7JtWbV|DLT{jEcK5XN^XK++h`K2Lzc&p%=TS2dDZVj;@NXnH~ndS z_Xll@Y-1-AFSd1*cCP*uB(_vsl8X0p2?sWG?-nC%~z<9c^b_6)&`dn1hbjAR8 z2IuT{Wco~!>Xu511;M{>(ETNqyHpd$!G!zZ<$$#8)+^fYUEfREz&^5}YK3LifG6;K z?2e0Uty(@f6yGcSto!1~Mhg46$E^o)R~rUGgux;TYP#A5vEr}xW{t&#M0rzz#~>OIrOVWD=mz#L2!WY^C!(2=43#BfF^_ppB4m^SDl z`~lx!!kCsYqIvg9P8xiDjjUrINF4roW0xxpc}J@*BT+Crrx2>>FwD(nEa~Y98_&@* zR4}YUTEN@K-Tg6pZ zLB;4mywf@DNoni@FnRaKu$ARhht7f*-*(!u-o|qJQc+WT$}xjh^-MR%_8_jJX%{*P za6=7IiJ*&pExPy=g>u6`>dLvM*n-5pN16S?tM^f$sx_xz>#~J(f|oYlOsL|ggyPX% zP%Mne5MTb#%En$9(E5AqhDL=y{qO0Wj-R zX)b!rZ~M8VEVQwR4ryXGv@Z#40S!)BSwPriog5zZr9_MF2 zn$w@ge24mYJEiW&^X;7Xf~?qm8jgz-nS=s~{Hw`MQqZb5$@N7s93k4?` zE}{Kx>PUNEH)G*%19>Algjxm&0{!oLM~cC zA47EcB|Q5|y~aWABtmiG!Us()H##u?L%$u~P#u}`4Hd3{>1u%b#eu2M%y^CLS$))9 z*3TA`^<}M_qf!Xz{mOc9z^41LRcE9gM*66eP2TbAv|6DrnI2{uHlE}jTeAq6nOUy6 zc~EWOM?4LOypI5^ida~#Br*q(C?rsjhw<;e715Xb^M%-BbqHHnB-Hx=OSi09Qjlw4 zpB21Y1mcu-%)6VCZXh8}*!GzN2M;F7E(Y8N;}#rpL#}O)acjjPk<~t#d;h*Hy|`@? z*euLvRnaJYzAOjP^lozbkfV9jvGNKR52|wr0A;=st=ji@TI&rYi3@9Mb6Z)R?KFr) zoju>Kt9*wh*8g0iCzkc$XM}4?*MN<-N&&fqW{{^p?69JON~%%vjK3zK@~uUw)&(cc}#PBs9`W*a> zPP}YwOYI#(`b%K@zg3pi5M80{y^PKS*hS_?ZzZS1cSJ6-Z=I3>D_W&Ff|I>f4l$CTaPW4A4`SCz>Uw+r0e5;5O19 zs0i`?_;Cc2=jbL1=ik0oS2WCd#$EzBMZ>ue0o&O!Zfp4181>e7DM1E0as+lm$7ZO@ zrzRBmss9&Y!Y)_~qCJL6rq^9FqhJi|q#SN~z zmfkm^qT>{G{WI~40_)MG!}L8y!!XnfcPQzmnoT^f0f(GR*fv@^(|-sUNOxHE-FK^f z6Ue3Jj~tbXAU?UtU^jAy5G7je@WuT?^~aBtuP&lj_*3D>e=ul9G6K74Kt5{n!on$7 z4b-&Ljxz>vq_daLbnfXEvfwv(K;5AFbz?ElsqMD?YIss>$iad`j?CfNl<%?=yl!%` zTQh;_?prrebRSX&c(POQT~4nS*RTSH5YAb_az695Yft@yJ!HlkRxbUC5$W5x5a|BG z^5jW4itt}U%{(1v&(na332Q~1k#P`9Pr$PrwimWL_qG!L80lMlPXI5xdhE>(n|QM4 zwSI{|9XCHE;!NV)NyOu*Ab04Mz4o_OQuus@1BzC-hjF_KCHejY13&BkS5BoJ`0!;cR zFqqfW+>8=@FqvUmr{A`jD@Xj!DL-YEV!K9eUy0m)t%3R&{4`<{>`o2<0JuIEgX6bz zWr+GBX#XiKDPi-rI8!zXnM-knt1|=n-7lx2N2IgycGNFoI_=Y6tsew+>epXd0N5g3 zJqQwVhBX!bx7J_M?az|O$M$ES(P=#o93F&jB*A_*woCIJV_j;pJ@@!}Bk2uPUZpL~ z^z(K^nJD-X16aDkdEK2|x2B_StUjQ$?Yc%~5r1D@g!wN<&h}pT@xAO08f5bao5RCq zu|Ip0VKup-vLzb4j*)t5p4mQI3(W{$IaQ8956&GeDMS`KtYv)`nO5Q6MXkGScR%_T z;{fbza#&1T!RI)yo2Dq0C%ISo#ck3tk(#Ttbmx(Wep#&0)o-oG6SI90-u!x2V}g<+ z5#Bn?4b2E^a{qZ0za<3~3B@POC;UpB*C8y>5SRs7 zyAtleydSsf6bJhHpx^v*4E}N-GcJ0YooE$wI6Cw4SPDKujuQV+9pZ5Ak`c*sn};qp z6f*`!8CC239C;y3&X4iKCq-!inO=U*uL>D<(P)v;nCaghw^%#GJ~x$1iwQD1&lDl# zTQ=h~NnE|Lw>1Kd3>Dex)n5sc32dh0IBOa$)<%-?1)j^{V>pLoR=}!dg!wGyxTn;8y*6b{%uw^qC7azAWLH0z@+S!cPt&NsIyabf@~`*(w8_;N!jE z8>)b2_h8~3lRSHAsK(D=w7?xFcw!ryUcxASO5Rod@jAv7?Q(khjr$Q#IYdm1doe5g_93gQkX$tIX7Sb}c0Bni}dlsV` zhrR<4z6()nxZ-_x8pE=6Q%qAV_e0VV8EU(=xpx;JEe~!)t5hY080o?$RF|ZB5*LiU zli+yx%eKv$RN1$8A*ezpr1CqhBh*x370eSwL3EAK??;RNL~O z?Ae--B!^p=O!Wbf`jpk__euA%vn3~}JD}>K>w%7J3U2SFW?eoDdKit5%gP)2y4h@zGozzZljl`4pr(RgC zmYG;JSoUI`({E92cd(D$tLm}Yh2DB|WnZyYF+DwElXmbHBR36*CzPIS)(Gg!+wb-A z;c6ifD=~t?>>}1CO_mCKaBVW zAe(DzA1WNCe-_=g^g|3%8vLjO*Smg2efW7X&Veg$|42XibDwMM-VObSV9A0RE)5@C zwPpR0r1rPLtMja3a_?Bb$~yY&caQc!Hg=4_*JF4mQAM3;?3ka7Q8=Eq{=4+)}&iL5`9FPN03lz&v%-)CalCsU>j)8hF-qa^A)RNq5|q z8+hY^dewO$qsR2h6hn1yk8uZp)ma8b*fhx>f1>CUP4bV>Xck?U3WwYQ?SPC18n1yS zK-t9-y;81jWz2R{-`Qd6beQelxeuB=H5ale_pVAZ1tJ0Q;QP}Vm)G%>Y2jq<3eIxe z91rs#c%2V62!GxjsJYgI8A9hmCNcXL8?Eosz!wIpJ>T0w{*i$UC8}=GI0rQ~R`(~X zL8wl&4vkpjRy2mce;r!-AyT=F6gSzBIt5Z8j$_zQO$*+khd=qxE;zISDX8yY_6>9r zR}=J}jAQgoOwOi;L$U5~01D3x*X7mWud!^IY*x8u;H)~&5U4z_wjpC)z zzpX-`H@>4a@1(M%>FK0wkxSp_Mxt~+kgPWHi$ugm>{Tezof-VIW6xgBRvrFucOs#9XlQx(uAC<$b2(^IJOLi zr^`AA!mN6`bD`lmWh~FP1fW2lvooNP#n98u*}0n8+vgG3`u_N(`!nWCMjv}t5h!yt zA|7r2sofEvUL2Tuj_VOBVPats0e-)CPTyMrr$fQQuba3<_vIk(vdg`zD=RC1Ol*$> z@|t!D2K5;`j}|{$+=k|FU%GiD2xgzd(gXAq>vIIY0GaGM$rGQZFBJWD2h~bzyXwwQ zA%tled0Nwv=b=W?u^_8c^FPd`mI?@d-?0#@%!lU$*AlLb_~7LU3Ss~ zcjK?egADy?Hg^3rYq7AmqUX&+iq!x(26r8^o+bwTzz*qzyr4)A(JG#RoD9@>emMo4 z-ZRu%GRAF)dJ5r28DXZxg2zzZ|ITz!xpa>x04+Nh;|LCcL(mmcDn_{Vnzs!(D~iNo z{|6Lbb6U!5Al;!qL%);7-O}}&kBv*mx#{m)^9G&JG9{T_AGrd1;~+B!IQMq3w9-&g zm}s&AGLJ>k1u!jk%zCZCU&xCfkLmFw`1tBxA@Q7uG3lYt2P|N7BbktyYK;?BhN6Eu zjl3+j-axuX3y+AhjdzR?ZyCS5L~?@DvtYODtsTA`W;vJMimcUq=}G8V)A*Waj}Qnx zN#@v1j@MBr1|jrXfYqUWVE(*JvyBe|&g<12PQ?4OG}PGIB{$9&>(~B^ui>7z+Wn<8 z4$#;y=qz;#ii6EO{HK`6>0E5YvlbWakd_(EqLG*A{*7x}XYDJ=t$C&C!;0CUrS8Xz z*6luY)i+H+aq}@b68@dwiLEGuwG1XHWW{QOVQm6B045gfO#<_37*$(wN9~A!OUn#p zQ)p*dX{k5i3F-F$PiySb;jjcZn}-L?aBh?AVKGR$;H5cd&|?0rORFh1+uwb4$v#2a z%KWf3a<};+jzdCLSHiXBYq}Qn_-fpMi#VKG^HJ+@$Q{0}5-!P3p}JP!@wTN*VGus& zW6!N8Dsd0ee?rv1y$B6>sD%K^-+m)$$rMz)l`44 zi;%xB-%zu?#;6{d%Lu!c0sI`-j|!((_;cmc#V^3$`u5MEhEgj!i2Np`XA@U7;{M*e z8t5VqZ7Eh0Ez^ngu??N{s(W2({g^Lvcjf?|9kv)L`lWgYdwwKIDmEHK*lRGGeaew z&CbnOo1q>}d2c6V?O1poFY?>7KSmW`aEcsx;(5rEBY@Ch+d~5vdK$7_0M9l^3Pr7#F^^PK}+pv$273nBE zSQnX!!3e-rpK=B7SX(N7S3fU3Ku>81+dZ4)p(Ry{jc=%%!L7OZ#8_o{<9ev9Y6%x| z7`)(rw)m=?H>en*$^mxN9pF7fpV(0X;&Gg3Vux4)g8gyb?JqpuFU&Vs3FhDWIVK1mL@TNE+osK562_spVPB+?5|iWwEvBsrxo*b|{S*3NqfMjPB(g=jChgE6TjV zpr-dH4m_t)=6h3T>Qrqsc(|X|}0-bN3pXfBH@#twa^q;=DXjCZN>^2S~bDhz}Rxcq!<~djOLD$~a z9*BOU&0&YV8mre;D)4{nPKxR?zav!I@ek6#jmH`)SwUkw^WP%rSQG4_DYvYTg<_@}!@r zQ5pcTqbl{LW?#DuJ#oaraAM;tGU~tRaZca)Lbc$acc5RSFBeSEKJoh2kg7nDKii;t z4;TbDG?vo*#9-AwY(ES9(jj4YJ@Bfz(!lW1w1l-=(*Fwm`@bg*ACETx-`YqKS$AwC z+LdRufv<^%UE)ye3Og%hD_K~qAg<@j}qIKBlE_&DWQeVSFw zo~B#?O)eaNe@|ubUfHSW*g36o4;70?Eo+%Yt*eovo}NbLZSR&0lf(#9_a74n51Rpg z2O{to2!9dF$BkI^GzE@+|NuFk8rh~P$WiXjzO0a#(whGmU>Vs*rU z%9w!lxwFC8+%$+qE)6*53)VFPwmnnXN z6K#6|;*->LLO5=`QYKK2%gF0;WVghN#fXY*eztqC)xXLGbMKS@je$-8?2$Ad-!W}f z>-ganuWU1(6Iu(XC3nW#tFsG%^>s;4a*5@cmFsXOddg+c@Y`jO(a>q_lWr~Ll#(p@ z-o*PX3%nftX|GIUBTVx2<{fWhR^@+1g9>U#GBodU$=*~jf~nqAjziO9$35g+AtNHP zl!-uE<_;z8Fmo59)OI%$`T;ONEF~Pq3*{iY)-UMlHZeM%#*fID@WlCom%Flcj!D`t zoi93p0ZNoQsXqArSeDH68^lKT!`8oLZ%D)$<>$w_4aAi=aL@P|iL0nvdMrdiT%j7a zPr)c|AF{ZkXfj*@+f3ShrW_LsjV(!7M!Y@kb4QeSPr^gF@2nK%7wVT5gOnZzug_Ia zydr(!daM{qh=!X>lbmTkmEXJ4qlqX_G4KcPlbv6{-GG%oVu9s5V%yG zIK%d@gf=kP4qM`1c&@f&+(3R_swS_2oI*s;(*$;!g9xK^np2vDO;Qs%xxzM?$te7P zCl`EdxV$5v<-)vCmTEh#=o78xXZ?rlpt-wCNF$t#c6)d%W2t1};GfjN`a$-`!K6UL z--<$O`?D4x<31{kwsFu`{{2BGswlt3ZZUO^G%us;)Wa<6w9$99T6`rn0^&d?mEpw0`Si*Ab7m@|jULUgi z5%zhKXA(hE{_290vR%p2JH-@l6pwKkzF!#i@r=@y199>YN;m49_FtuK7VKZ+L$>X| zv}b4FrrY{BKrP5e&xRS8z6sjWI!2I9Aew+-ZsH_@pwcj{>;AO#I;2_HZb3&|#xxH0 zhel+Y=kNJyP^@j(O?rl9SY-#fD!u}>Qo8PcSGHA~_k)F;f%Ix!2lXZE&*z#+_u=Qd zWfP!!42AX2Fkq8V@JyL=@{>rqv}5`F#O+^SV^iq?u#dVf2Fqu1@8edAHt|nL-UPKX z44I?|DP_WE(CxRLk2urfsm*@U-z|D(7ldj|dxPQ*Wf2NaQ=yxv-JQCu;hjs%t)_qT z^md)@TbP+<`cvVz4a_{Lf?%$_nvr|89at5r`5T)TJ|X!SJ2D>jGXObuTK(pTJmpgKGG#kj|-&w_D}XP6XEI(nc!BMSj$wG9v$}%Mi&! zX~sc%`hN@MJLorp#~=({C~91rCx2s#*pT39n`@Q@`xJn0gsE~hW~1{jVfzM?4qr1K zoI+55te1Lr;1#dYigtFN51&)Mf@+LT?0>{(#OSu-Idj?g{+MT~M;>ip4LsySSD^0% z9{Ybl*Je|3gm_pdafLl4n}U`J;MD@`C$a<1(!-cgaj7WB1(Seu{^hDmeqOp`8gh@? z{jKZ)NXyje{e?utD8zzwhN-?LBSX{B_)@CB&DljtnE;u9$V%J}az|ed)V#Y{mx4CN zo)&!I=KxsS-JuA%+2tDt@k0v~x*>)2H)FxMxx5cY`73Kx1!an5c{un_NG(KLF?Z5G z3}sO}I;|Y9!hN4_+b+C^gWkai_2j$}c32RqzPWCf z#|CdV_0X9)9(N1UTCd8GWAp=hF5i&Kg$+X& z{5Hip3)a%|NmSxPxwj6vPe@;2zidZxFLR)Y`@P{8W*M2^lJ*`4bKO@gi-|euyjWej zDb9R@4%1FV39b(_?SewEP&+%O43a@|-1l8l(8AL=juOgsQLMqGxK%N?QPg|js2 zCf!V$?;p-Lk0DCGsh;hw%pJ$WJRd>^Ppy&7#!8?$SYtu7!&u?DRG5VIgb|}nq|Czq zX938@vCcqLBxtQJ)H+2!FY=XF4i|V+>%Kz8TxV16C3c^DO>Z^I*QY;AG0?5^OIP@l zZgfb;tjI_zuY(SyYf3b@J^)sz77wc#nx*jE568vhPPiOS{n=bPlkBzos4k^aE-)i~ zjQ2QQbJU6JLG(%&M!SaV(E)xZE$_Sdhl03dH*KGneg1tF{_Zudj%yc1yj2|!tEIu5 z{%|b3Q4~3x5m(u`;=feb#p@F6O(fGIb2w^77N>J0-mW$4D$`EPD-uukpzndg?xbSy zZcAukXWI*qH9mv==SLZA#n%l|ExH0ad6ovzH=l->@j8x!Jn({b6fcx2+qCM1@aK9`0X&H1rk0VYri^t=gKCpewKPdOGz%tvC&NAR9IZ6; z6R6!S`d_XQckmo!AG50Mpmy3%b&WG5$gGLjiU|wpEGjp@IRU?t1M)tl!hY+wkei5l z`y#81Z!)*>4^%c^lGgr;KK%*K7mhfmKNBH&1m8BZ3j+?P3L2=4rrCr?#eBXf+hu^_ z68GszkL2Z2DSHfGLZ1_iOxQ9=F131@E1*Hw!331C+5Xp=4bKn@l2R;l82vCqfr=WR zscD1f`CqHpnxtNp^g(^WD)YBhfe_4uwG+Xp!sPbrFvr7jaxMUH?jG!{IgJI1JDS|o zXnAb2Teqo2M^{x1Ufym$rx6^{qSrnZE~vq|VXr3USzhh4beH2l|Sr$LK~ zo`}urx{aKBI2bz(`>NhX5^+83aSUz?*`<0aSN^lrdWT2>jTv59D6>1no*#kg9mZRa zKs88Dn~HEeO}T~YHn#)8UAXfp-u61myx$|j8Pj1uOeoQ5{6Ct`GN8%tedCJqML@bi zx^n^|N^Bwm(k;zI2|+?qi7^2c>F$_xch^RDNlJI;$T7zD-|xl${oXv!&U4PWulv3} zmuA{)-2GR`yzsz;!vALGn)7UsC9HKN^U`H|{~Gfllg$QZ>cu6Z(|==j_c`L7xN>Wj z4akUvogE4rQ4E(o8UesV!=UieK9}n3`T3IlQ1ls_*!?%iJY{b$FN2G9TIS5u_2t&c zQERG9TXb&h5zFj+u8i(f9Dq|2rEbPe;J*ptO*bH*OhbNgKZ?2p@#g6n_7~yBmAJ?3 zC^CmcpiPcfF=LI{dF<@mU$}ph(GY*X)6n*_HYBZvz1n5Ria_C^;)h}Q;HK{9^7c_V z^jrBkj#aS5QW979Y_E(w4b?9PBMtWYe~I_&nGq7-hT&9jBimW$9=2UavNk-B%@-3=Y)K=@)Hd5I$K$Q@x$>7MGP>I1UF8D zrPL4Hi~9>Id9AwjnENgRaq@dX`lDZ!%qw0+XTP4yfgYo}_}Zu!aO&{FO)ry{4hHG- z@XELER~m8FC`2lNfrM=_%d|8fj|WRCUPCwzxUop+EhgdN;&=(>t}LN?rINGD{fSKL9QkX#k{GC|P|F^>2QX06OTiYI-2;S&&pR8e*1SX~M{YlRH`X7+ z{gf8D&}=g_FZKDCn^V}SLB>YSEZVi(j>7)1Qrl)=m;E$nG%_*?e7Df~b;Ydq+d!;R z82&t%|2Oq-^z+U90`mb&nUu%a#n?+^5B#_nEHy5ilR_wWkTlW=ZO*w*xfk@s2atOa z!y;x@<<|Ml5>@C%ZoQ-s+iTmMCR&IluB+W1yegHHwDGGN66H&mWlrm2~kn&DkO znskf;^axw<9HSpc5oNWhE2bS+CzvqD+5kY8M4Kt_);8iCS9>(JK{eaM>ttJQ8GaiT zr!+-LmgqE*Bmv_pKf=EAZw0s77ET!~O)em~OMiQVLqz;d@Y@3FvLiG4vNZjL+Wc}? zRz#ZVOkI7Q+oV%)`+^OnehtkY2Z`^rU`QTf%_#oV_FjhMCH9@$<}l2vw8o)TvpTeZ zS^eoie*OXXL_u*Qh1eUvm@Mt-SA7M_0exugL8HWrspye!xh)Pdu0nl6qP4!h5nFIs zzpU}+BcDt05j@ON?>mz_^cgBLh?dAMOCqSq;(=if%gGIX`J*`=dE`rbZ?qVR#~PKf zMd{WfRTa#+&sBMcxO-Mt3S3oSqUQ1f;}vHZ1>Dgn`;&#zwzH58tJ{h%LO1s`4tuAQnG{26aD(N-^3 zh}G=+?qvL8LyHrxR3+{Sns{T-MoCIBMa@u2KsJG*jaDxX;OYX^OF^FhH^|mSAuF(u z6mp^(pZ&0n2YT2rPk?Czx?-baou3ch&FyefX(dM zODj%|GCMg~dnjI=%yqx?0061JNa9h*YKI3({l^KkX_v+_+tlhOR6mV)_1|1*j(}?r zNUImLWmNtcr@OSRRn2HK^)Bc&JKI8xHmo0d7+^o>tf_wb-@0MRJ69r^Ix?0~mM0P9 zY>%FLKVG3J8`%Z-n^4%irX13s-?x28!<#ifrOp$chcqH@xm6aO5CFVaqUsCJ9xGN; zgIRYZCW@Npg+o+XUVeD<1m-XT5-kyAGf34SV;5gk{4csEj9x)6xfe#ezcZolLc<`? zgj4K&L)p@gW7(H>UBHpeS~OVCopA(J2dtVoG!K7>h+BGN?l6y_hy=fQ75Yxm*sC5< z)_tN=B&6URjyr#f16BSNsch}sID7)%)26d(AZHDW9{OD43`bJDNN zolMafC1z6l87tv=@B6uYu0vJgS6&&@gSaD6gODA%%Lz&H4!1<>D{O~mDOp%WAlsb< zwCAVbdvpo(Eq&5`5p#b?dWR^2X%H2`Cq zD&$tNxf`=+>6MQyJml)iI3{+m+eT8WO#Q#-*k9kd=4XWZlc;`*0Ih7-Nq02lrWG!7^NB5_ZGe+p_H<$7L z;&Z0+aD??EH5ykE-oevW;~E#M>asS^o6xP=cpXOh5An>7mrnBjo({e13@@Brm?RvN zpCw#>!Vs=BRWebmY1=^FrY@qTF=XZ)p z;mW5lTKOFw0e)H_HXtZMynaVfq8Pke8|J#rGq0DCN+oLJahC%6axQg{9~Ejl0yA5f z$B*S^)#o>76AcHfETKM+MnAEN%EqUA@4j^W_jFR_uv?e>J7o2hlbUtL*f(77b3wg6 z>(NwD$x~7k=HZB{)br={wa?5ZsU_`zp!2z%;kkh#85072ytj2$J%&4Beqz4b`Dk9@ z0g?E^lsaicZF?*?W`8+7wTk_()N>);Azzk#<`t+XGR{45)bS+Oa;}5_*DEA`= z{d&gM~0uSk}^SfjGzN;<((mK8yD8~ zE(4_9+(A>OPRZ;47Z#|b#QUov{)p%}+shtH(_Jy7khhuZ%%<2?#N_7aXxb;-751z| zwf<{8P-5p@qqzFA^XaB4_jQ2n4?s>e=eDhUPsNjDD86D@Q#ofxT^|&vNB6hdOBUGj zdG8|rP1v;_ zvhpP+b?GcPK(&7txi_5xeYFCk#F1?QErZ-#j6CXN@TH|1DZ@#THA@8;*R85_zFLxO zGtg_){2z4MIZPomp{9iXBx)QU4^?q0nGqzNxJfg%6%}4=1Kx*bG3Pu2 z7uw%4f489oww6sq4TAchh1}IHI_~ATZ}^F+U%&oS{``yjlY5M4p8KVdmwjJw{Lw4| zR&SsAV|ZL8&8PD@$l4CETp@_7GU6Y!YcrUK(;IdoL3>?NT^^H#ubI0rGnY|^B?Y0o za>VZO1Y*urJ`+gZZ;eBLAK>(=W^*Vnb80^DvM!63Kq&gXw8 zl)OW3+vDkP&zxhX&x^?L=!t3wUM1a^{Bn;%-0E#qUy`u(7c1Pj(a{6})S!xPdNZt6 zlkYz=rY9)GQ9;H&`;R{;{5Sx&LxBm0ygk!BPWZ_q%8$tD28!9V-|TPAf5!&^>PDMi|35&T_OKix+t!8A9eE4(j!DQM`VauQBi23>Y@m|hsFQNbB0y6T^Cw{ zJFCCr#vq=7AX5+Xkq5Ef0}Q37PI}ft{3SD{8=A0QLdnl;&D899jem{cv{BWiyCcG( zp@Y7sGW(J9(@s)Ds&=r`dzBOq-an*!VnJ@y#pr>>s*G}d5{7M-wmmnn*TVeA6=*|X zKZ)V*Na{*2{`(-7)Wcbql6@RXZ(z`b{r*S=s(>@m(d(su+-MV`FS_rm!Cu$J%njcL zbsJe}w@FSPPR%uWL#)7-gPDscjEk&vg&{uu_?GqRgbHq$O+n956h)@Vcxk7q7LAuN z|9Db|{@0;GaX#aSC=Hi3^vu!LfKq+A^}%T;baG+94!mtEolI0vxJ=EZ#?+v@y<7aoD&;K*pwmBHXN09 z7m}jlugpD7KZ3iD$lJ~^z!TSQsKUc}H47Q1!10 z8Q?JLbD%d=qX~O=+t&+tTl9L2%R=$?Y6JOX4WpoPRsS$n7(OmOP?XXR7ssn*btYu8 z(FI?=D_f}*Alv3ku>oquw^uevo&&0o6^^4^CfGKJlcJ8low6hyJ3{&%TxN{3=XE+H zN84GIuv!e{_wuorPlF0w^AQg3zAhi3J}2<8>>uPk;C-oXlk*hPQZ6Ze(TWx^Mm7*x z;u1zjO}Y%i;Dkblx-&H@-k#p7bb#`5C*ztd{{)?zp*`GI{<~%)!K7xL#r+1P_#{Y^ zr_KGlyQ~a4;L-4b{}pq zeJ}Y62i)@c6Xme0;p8x{v-FX&1^-dP=h2=V^mGaS{yUS4-CWdjvxPc3UdW%*<8bAd z;T4R~|4eFJLX*mE_dmXJ#f;<1dN@5Iu~QjRpoc-BH{qBSO@>@$%tsX7QitjJ#oK_p zK<)nrR?6kB8}hu_A-)21!TD4RwM%ijR+8G}&@Q(|iU9V_=O1?_cW@Z|024F{@RK6I z@!i?~o=l#gxvIkNSrQ-NR-)tjv|3B^^xXpy%Y2n}c{}SB_eY}34zsYC4Q!{$FH!OePSmd7D z`!sE>gk{12+?0@lbA?PP5D{`=t;KoP@M&uS?0YA^ZG5J9wb3wQ?uEw`sdKGxZkL+z zs9Q+on(vO7WwDXkx4LprgI@QkA%V>P)Bw}|$8Ya+4VYvDsy=SZVLuZ3rHcs)9q%6M zlnDtN+_JCwlEK$J^7x)l z`Qc`f0XTN2HkugBHTGgFJ|_xj+h55UN-(bZn;{dLxw9x<6MGY)Q>GL+%8n4q z&CAcH7hFefWhSq|LJY{p6b+9Amy_Fd_HA;s8^3yZx}ALzMuug+VUqNC^^N$E4M+nI zINVI}lHmO{zF}XzZgkA6hi-m5|0bedN`LV2_VVEsnQ_azR7*zqx{+6m@M&!6dMwvy zaV6gNGYZ_^&6)O+Fl8FzGYPmxokfe^^7Tfr7BuD*_3Ii;%`f-zF@P_&2de*zrbCRV z+fD;flH;FvW`y^pv-{$Mg!wf9rKO1`K`71o*6XuoUjUy{O8?&ShwtOB9$qAck@lz0 znK?eQ&+e;eYBK&qzwW~8pI7`tmsm;#z$yuv9FlD`Hsz3Ui}Sb=EO*Rf0BE`yeQfh$ zX6K2^Ge3P3NJRXZh=|Ealy&=uar>n>KkYB;HFM$u!y#XerkedgMZ29u&oJq>GnF)_ zTs2>^PZr_`&z`i@^v)mJOmR50<3eTUrM*Q>B{qV8lc@D$%gO146%GCs$$k@`?2YLB zvz+}w!6XQh^p6ox(M5CfxY;v>{~w?vT8tQ1jx;J4`+-3Uf4VbWNeFNWB)nUwM;a-w z3RU7dHUNp;k#ie((4?W3=$VMj_pKqa_pI z-Vz1u;IkvGRw1q`RAN2p@jhJ$w+yK-|NB(qW}LxF`2+u+2=h#d;?8Pv#%JFZKfk`` zr!^UEb+A#fAxYwkA$T_`Eg$*mw)7y^#v#-0YOEBMA!#>i70kMw^E5wM&U;W$QtRb@ zf6fbGn(-iOq07bdG4^lY%ctBO-_hPFFWtmp@8buJgCpuZfz&ZjVPs2IzOu_Zz0AO~ zP`8q2sCMW$ds&$JQQOPRbV(@Ni!U4-=@v}U^X;VK_8Kp66l&i;p1Zq*rS=X%_V*o) zV+vb2g)1ZIUa97jERzw6Hh4&}b3lr5k0;+u%=|pK8L$!g@@nhc;LWNBoua_k2;C06 zHO_aq%7?ZpAY}{B$L~v<_%Sh2z|nL^Zm!M4ldgj2feB;v5lwKT3O4+HG}QL(XpzYF zPN-DwZ^KSyMzA*QskP}z@%>cPuRC0)RLzO^p52Eq;^fxJ3VC%w#_+>)Z7X=!$-zt(J#pDCJvEZ9Tq!qKCu(im@ zLJz+^@}}${`;ajbnuQyRvw5ul_wQqTCqbHM#VJJ!N-p2O*t0IY%8EV5LWR%mYCa_| zU`rUnkk^pfq@OqZP71^>aa+`Y^fzJ*W}B@Gkx)^8UQG|bonyR54&}%s1%u%qxQfU7Cdp(pu-??1Aft8Qgd}<^+&RdHQN~eGH5`P2*e45eWRre^- z@-(@Ne2=mFo_9qWEdF8IYqzvh~PZi{Rc&_ zaFi>`Ow%c0?2lPK*sAlvV<$06?MyG={fyAlI}+8r7D)5#%3D>cJIF)7xdvIRyw6qP zv5|$DzIq+rj`_^#11ft{dHn5G{buBck0q_AJG6w&YrnIw`C=yoj`lHPT*VZr8iJqe zV^JGE_Q1L!vJdoJ+V-k@zp1zrkzYQri8MI=ytQkG#byDOd+!-4ez=*)dwRk4<*?DU zn7a%i5K=>5GU7m}uUTn518Ca)VEbb*`YmR&4+O+@lB(to)$2KYr>pLNDso@YTtdfEMQbJvQ$zN=Lct8eG{3m2#WR?RR$c1RK zUgE?dtt1a(w{2e~d>PF>Crvh27~phYGvi!+0EwTx#o@-5)9#rXCY<1QI&IkK(q9q2 zp`ueD)U2r<&QcYI-c43N8`)l4Se3gftA$56*o-Em+6UPN<>d#ahU$~N$S27Dp8flA zVEYypGMRS2T{PK|-9ayA;xs)LxOG z2>+UY@VWGRUI!CSL!$NR`QXcX4On^IK>TIZTFJ&&^t16-1-VyL3z3wHH&J1vM3fd@ z`hNCi1|DxvqIoWu6Z`$2M`Hg&cxuV)@nqP#df9kkWC9jraUG!Qc(TMj@K;Q!+`W1a zA2ic(&nflA@H@lz_hrtt{Vy^i>*9ZrvF1B%_?F2J8rBfJeSq7WsS~Cs{p-^Gt$h4q zZ&bgxppX&PviyO6Gcj>bzo3TG)6}${30`-q>QMHE`siN-jtQD^S+xsZ6p{7s(MqW; zlzZo};^r3tB2!vvzS$0fCCemuEONCSIJXQAm(1Wuq(fbH5@Gkl=+D*zz1Z$%5a5-{ ziqJ=9EzqW$wG}~9b>BVF1voB4mhClc^p%pnkQa53eaOxB5CSz{gRa@9C4ewXN2mO7ZuqL zGZVw1;V_%GZtj`tTnFbEj7f3GP3u!!3TVl_yqGT)Jw^ktmccy(oyB-~rzk2P)4NlQdw> z$S{CZ1vJkXayOm$>tz>Q!pIgJ$sgzb?DIbwbI ztmG(8L%%OSOjzsE0;4Ha0rk2XU7za6uu_jwx_fEda-Wf`C>LqEyku@`gwsK3I9iP4*;S>@ew+6f@Ay42RZvk9K-Ud3B__-T2rLd&Ba{_MNIGsYxE8#*irnU1{NgmPVY?sLIp$TV_b>F((j8ROTNuu#T})9P0jUZ zjHHHkSBELtWz;sT#k~r;R>g41uLg4g|QpSBqMjP$|ZwP15;{v`zj*9xO6braO!3Frz+Z0n;JD-y7imWQ zMs)ubWhk=c@Mcfl4)~aC^x_FJ5j#Be=BN}Z?w>R|`$Bn*isW~6;&1SZkLc(B;Iu`v z{;@3Lmo31NnjvDtK4oOs@T&=0!(NwJY+4B6*aOR_ee`{#`?a6|3fLAvo|O8fj&ZaJ z<~7_?k!?74J6eUYcEndK3J~(%-StP=ctJ!kS4n4WWtoNazi=5nvU7fWCb0bn&b&dI z+4|T_ODgma+-{n=!5=8G>BMWK`d^H=!((gz`sf21EDegsZeT+=3Ou@fMHni<(&I7# zPAECWaY4`R@w+m^7{y(lkk(ocx*uA6k@U-%{RLfHUhKhy)#bW$L&f}<4^r600ASIF z6D(mwoJOHU&-mTx5glaE1fw$}X@3iF9_RGn27||jqMIQa%>3VJ!xM=glzI2^ZH@Km zFW)T~1N>ES7n0g0P))npUYCaBrKR5ZZv;uFJq67;GMpWo9D!~+=@qYk!?Nw?dFW|@ zcow)syA8FKKoC#ZW(h$%RuA+RD{g~a)4AT4kP0`98RGdor6$n3^A&^Z#6<4M7np7u z?fCc^e)4VfB&mC#hr>&*h}itRPN+Fo-be6^H3>O=OYX}g0wA+abT7YYBN2^bNaq7o zhGu!swQ_(q5X>H{L=vX@07q4pm{_KA`Un~U+xtlaKVrd@purd#41fNf=agf2p|W{3 zDjT97#{b4@KR#LL{hxiOqO@y72Y62@;ewhR_ZX86bfjRCp>=fVDqSpzTp_#E__~I8 zdQFJhJd#A!&xOvN=Z~e|zByfSK3`gH+EDSxw#1BrFFq8<-H_h<`mVCMer+K>ckon& zhm*gKBBd^oBlZW14`{4Dh$1)c_i-Nv(9aiH$qg#<#u;@?Ve>ejiJ%{_kqd z`G6JyfMfisaH9JzdZGYPpG>#Sx56^WDJ#S7wrs{8rPR(ON?;mkl($YFW zx%_{=U*b^;MzWxPKs4(=V05JYyF7G-S8cLhWB7by`q2yi)5x?2SoKHS_XP|Y#aH@- zHmFEq!(+of0II?hm)27ePrPE8fKm$1hN=OTJbPZ>QeyHVAHjyLviIy{4LB?gA#NDjLmXhmfLcs&SSmHo-v%lfGFas|t zSiKpl_n9oSNF6GGLh09uvC*`?m8p$7KBah00TS8_B*E||&})jeeB}v{(jTOE=1wK_iMNxK=Y8K*2T#nFD-i&0_Q%Dv!^iP|k>$hR#=to`S<;&UJx#sD~8 zZezhR6bz!@D%p@>h&TEkC^I8SB`0jrh29&-sw&^dqT>{gN#by)S^}O*A{p~I1O?=` zc1rGdSl@{vg?xI<$m#z!GajXkt1Et3GWM`$?2Ix^2K^8c<~HQipr>lU+`KjJxBnCs zjCGif8;LI~jgoa1pvFf{dnxYuy#H1!rfqUO$L%~Yikb~uNxM7+i8I2bz%zo_AX=0_ zq=c%Ud%fVQK4MAl>8S$x?~-=T(UH_9oF(Q{b&^>``!nSxjAZTYQ$vY_lIXd$4T z5NE*TLv()cbYQrx<*lfp&>>k`Os1S-Fqej+wj2h5&{!xF>RC`bKdKEfP zxn{h-PXQmD2enbz=iQ^MI|3EN0r7EiM~DI8G0m(?7ij{$_cyUcmOo zfKGwyKd*UJxfZAAt=&TkNg}JOohmL1U&4;|YjGxiZ4yRCfK8X98r>pEgrtz?g5NFK zGM4yDgkW_MG)BmDbEVkCzM~E5WY%uhzR>dc$%OBeY=f zaLs8nVHasnfJp&Ps28v&Fu7fL3?@M-jD9YYnE`KDQa#2EU~1-Yrq4}GN6oZh_&kN~ z3NHVIpglEZ_zs?f@a^8q*^2E73*zEXHacE{h9gfqB|s=ISR z?TlwBp0r#8U!B;nu5Y6ZkJ1jl95^-bV-JfVGs!w>&8vscK8^o*>2BE`Ufq4AA4jI)OHs?RU{*{Vinq?fr@yn+7 zxact8p8hBYoU&UojQiIxxBH$nC7G5$`%HPbt(rs>55CM``(7sGo^EKYtu}4_&4P%8Ed7}I>p#9 z{7dB3+KrOrobBH1=dRJntqzvRZ##uSh7xfi<{%++5yNIGzF)M19w$==+W_wvc0Q-^ zuj5)~tv|UxRjcB%wp}3Owd3%%)7IV{4UF?!{vkVWOf=PFp&0gJ!z(H1un`NQ+-MFY z?#K%lNH;L?+U$3?UcHr*vlxK%yy5; zrRP237X=qGV;+^K8ACsbRqQ;~piMxyoZCBcqWlEag;#%~3dWQFzOiGSb$Q1dg%S?9 zG$Kk`ixZgz8T?FjEB@2Ha#8zP)I%)qF;?z*iym$j@bIvt&$-C$xPHzpPv|DdMV;gw zgIl#rbt>xCEIk**AXqu;m;YI33>tfZkMGsYbQ@URFL4V26?}UeoVubnG_v91vl?)V z2OSKU521sBz`P&y6NIsW66`Gtn% zZ?JtkG(pKQ@Kqa%%)O}oN0`6$5{JJaA;&F0-MAea!s0*YIqXTuW5$K;5lc*WhJ1s! zd%Sv49fm=p*{bLD@io=-pXDY)5+XA=!?Bd#VW;5692nJJ$I^N^es$cYFm##qdcLD~ z$kGH=aZqZbzC-prLf7>3N&w5@jjqtaHSO=0OwGs7f1UGY&?Nwbfn(@oV4c^6oOFE0%YI&??lJ{DM+*^C@_a%u7eA4l z$S^DdemI-kJ^45xib7B~#X`YOilFb|K{dDkoc{TIkk17E&Mo6RnZ74X&~sGV3i67)cMQ<_^g0R8UL4>C%{748eeGA1F8J>2I_}uH z<`+n~V{^@0Jvg^Q-3zYBe2}N_kx>UptL)eZNLB3>a&N7TC+O}X19#DoCPTdjhl)Re z5xrxy0#3V7tC{y0z5xx3AD&DdF#0|2GG3o4gUfXzC1%_L=?g7_t)3RJ!Z)wzBv~gB#i*Z_-A1}lD;@2qig=Fd{GeBlBA$(4XKfqybl(J; z3AqFVNgUWLocgxD5N+6{C^whW%p%6yw;l=K`gEc0#%{qD_jdP{KGX9=2azo(4)jPVAH%ReTwITAWY%QUwC+Wl#{Gz2^Oat=Oqr@+ z1{2Kn#{f}Zf3xQ`y+1F=vd&6x#vH8A?&frI8s$X^$6pX*6VUBhuW{xu!%l_|&!}y% zg~U|EH9qJ#nT~|Z2L8<}&7Vccu}cPAGy2tIEgpl3UabBQXtKZ&OcMw7-`9ujXYUN@ zH6@-vPKfHTaeEcCD>tKG$(+p&gdsB0;$xR{AI?Rk%pLYFl0Z0{?csth*Uy)rOO}En zj*wz?zuN-wHkpfY#jpSTeOaAcaLd>525aQdOW>ZK^zhWk1Rd(|7Y^pLFP*QM)?5B^SKrS8Qxk;p^iyTW zj)L$E|8vV&KeF{}Jn-72@qJd0(~i+~5k!ZO8tN3168AQ_8WM&*|Eox`VjP7RTGjV4&!w=kk3H%+OND~i9FL3+$ye&*SOMK%F*zm{$cV+B8`W{dgk)N6&r8( zj0B~^b~E6Qei?glGb4>_h`~L?0sN5XeNWtnpL5qw`1kYo+po|$IX8w20o(W%oLo5N*48sGbk&z)I(v+#now<2^RJKSxK87I=< zqH4}t<^Z}4m;XZp@X z4Gqzaft@fYuFfgU@*XU2_P2cKRl*v9h`L3$3d?y?aDPiI@I#J}TDt@^FXoNtMw5}c z`jA%3p`)LkFPQ!EwxapU=?AzAjG9o1m+*5PI3)E6!VY!o-RkFo@`9o7_T$zHNamo% z_Ell3$!H|$9|7X>AgsJv98_;a18QQN;3{5jGlBRA+(b#CdGLC~Z{kL@Iz|||R>^jO zm=OMEbO6A5US_R-Zp9_sn3)3aQpV2UmX1AVcPF?%yPy=P?UiT({5IocF#b8g9w|0Q z^UV3Bbs^@@6xQytbMZx7Z?NH&e1xU3n8s_M-xAwTMl?73-EY9(pXbn(g4u6PyP!m| z8z^ebzThwEkd@EHuJ`%?e@OCe-+Kq!elq2u$C(aEHx#hdFvp^df`!ey4UxOnW`dQx zLg>rO_{q%l=Z3sZ`zb&ZKh@5J2-p&$a8Yl_dj}@%0+hV0)mgVqD*wF#gZVCZ5}s%E z?Zs)ynQWQDPN50PHZKZtSbI)XKxzOvwE=a}-6Eo=vPSLRtc)R`nGmGm7~7JzoC)Gc z&ut+|Q-mJlsloHm%m!@3(Iy{IHjhdp zCg~a%?yuP~6Us?Aaq$KPHiYCMWiF#glA)Syu4}>o^rxZcX_0k+`-1_SIMxFSsGd-Z z>4pr-ji4zwU_o74yZy7{EZG7zBSV1HG>F1j`;OS>f z&1)#~p^3c_J%a0Agaf4Catah;)f|B;d2WNXxt+h5KGF_G>6)fMr%@3ftXUjNDSBLv zE~FLTcJ+tY{YW^Alz%Q4mPCC+PxDji z3#o1-Fk^G0Cf*YF^J96|psT_!!;V<`LecJgcsG4^d^mzqc!<)T>X5@9x`ZHh_ z#j)OucW@PuAo}Ba^m!!f`OcRqHppuzyDGZmt&sKGXCx5R(S@!Go09!1?yA8E4^f?a z38fZlp;QMTT2cKuk?RH8{oyy>+r|C8c`t%wh%0Owq>t#)=a8vu=xI)2f(-c!{xvnb z4DW4Nd6RO2$EBqSI`pNAP{P3E9rUz|-IWq&Wqot}we9Owsfl1ikFM4M!edVu9}X~E z(S%O@9i0<}MdQm|Rl%Ajxfu*wDAeIt*0UL(jumL1hx3?s4Q9Qp^dW`(CEVFTh1A%f zu-Ay|jFWUu|NHkr3(|vDlfN|4!H^(DO-%NmO<&h#3G;UGoWG;q9tAblJTfL0y7(*U zdydLQ=elFA4pvTfDk2q6aH8`t3{>ByFqyT~7y_>7j4KQRY2 z3@c|%|L{C6V=iwZoa$o{77JszpkaGwCD4q`4p)!?luM z_|CG9&M`s>N1JXeP-iqqyY*}vR$bWdgJkKB+qs{%UP1tqXgAUoOFy#3u{19T)|${E ztXRuJz)UmqWtUi^;d!%Pj!ivyjm;{_9(R<0U&}Klgt(6EGeOk(??rsl$^NnL30_)N zQOUyba?*KEI=ASuW8CMe^FXK$3N8slRR9&jE(0Q z&I?)vs$DT>l@;to4D>JVPFhyZozWk3C%ANZLqN%OH}kVm_ys&W?%%nyn-jGV>(ct$ zCvjDqLhld?;X{%B83(?R?|(BYXJ~<)=-wPhKoS1nyLlr;82(m%lw83+nLo>C`f4LA zQPdZ`SAfG(AXH#Y?qI)Eg`b?}A&FmG?^v~_C9E)ILYd*cUwP|5l_!*7i-WY#kSJVT zJ>=r0{(vX>6z|w%M~Iz3J17c#&YFl^ zd`_>O(@;{A${Y3bDjvopNJD9VZo1D;JA$Lr?u&m6FsQ}N0~%cNngF925_|U>-O!>g zOGBPU4ghrYkYOau!iYF8)FCEu8Vi8Wpg_CO4u*)zgu{48eMx^UkyN}gSI(29I*2T> zSbU11nH2CVb@VI!IEx^}#ghtrSzF}rH{8OPE#)>SI~3v|qa3`c_hd_FH>-PIud-0( z-dsTG`*o-{XX{zwYfLpEms);r@Dl>H{#t4^#o>o=E)}BqSVJkeaJsiI6)4CZCh)y> zllEN%u6TV1V?+n@P@C6*31RW5Kb4mWZE8@%>eo0DEH%rTzH-4dX9x@+GoY6TErY1q zJ()8)s(KoV!i!0T0UyJ12lYpw(?=|*XU!c5Q@ld4p)D6MNBUNL^>%(1;f117aT{}r z99yV6Y2G=OMtDbo@79wHtXIEduf9lnw1uMB@vyV9UliAC)Eh(Ee(N-0mi{l1fV$z- zyTd+;Ta*#P|Jn2$qybOmeJ0e(*f@x{K(>qp5V0|0{qf>+IE(bLHuPpF`)8Qj9X?LL zXNUFcA^s=$Z>`>zr0Lw|Tr}$Cw(+N*xG>n!kYraqGu&_$eB1CDmMNYgxrQ5N7t~ev zJy~x1$|gO7t_8d8Z|GjX~C?^tE5E{h+B*z{e(D9(?ruxN29#6Vy30 z7mB1qPxS(bZwzP4TaK=x(wwa$iSN4D?Lg8yD4IyzKNSj#a1XiKI`spb|2cGy140Em z8ZRh>mTTuyIHPc~*WNq#iQw#km^8fpicdGa8-ihLKdp7XZD*F}U-sT%b6MWtx5K?S zZ0Vsld*Xg5zMB(hQj7_Xtv)Sc0Y|G7cc)>d(VLi(Q^dsYgkqPgZ(2EK&<*j4qY$4_7}jO24O-TP5W+`388wVO3GL|#)j!7Z z{L~1#TEYAr`xjR?KrFD91I5Wx^ssP{odrTIt{3U}+s=k_j+}-RarH?%fNz_yF>}mK z&m>ltfrH&NKwIAFiNPm76 zu5Sg#?9?4Y>E4&nA%vL_hNl%y_)FTDIm{9B3OCW_OQDlk@oA?M^}CB=hrn{M|3rN# z3DB}r-g50Xl$nIt|A28{{Z2MnBm#Hyi)42@?dWP?W;@Hgi-SG!@8+i$%P!JBS~%Rv zyhZhaye?+pT7gSnS03P@tArVq`93C(4=;fi%tWo)0h&gh2cr1+VRQiqH+Leg@w0>> z^V?3^)xW#pno-)JM6EQJs++%kYz1FF3f5AXBgxq7=~O?hl;0iHN!rTgJv5z95(5`U zB_=Mn`J702CH<~Y2Q*#8$7$y?TnUz0Cg0+#3z!?efj*AA+Fue*rPs1nSGv_Nfi7bo z0-Ma0fTqsO(DO!%zNK%IBNeT&$vTVfHFmHs7$p1Mu8Q{%9lEyO3%f0im7Xo2hmn31 zI&$DwCI%aB{GNxkUFS0ZIeIA?mfQ{+t3EcB+5a7CWzZunVvJst(#=VQr9ukeS8|Nrlh=2N7NgfWl3b~nV+lJ_L=`}}4&9mH5>?T)KA2%9 z3EkW5Ci6(4KN|m%pvvVHQIl`<#6*AQg-)5T9JOrpy?I5vkYd}s)&U-~KzXbpF3w1C zm}{6=+ohR$qCI2pHQ?6yn#wucMwUK@0cD5pcnWB8!hLmShyI@hV9p(?hRLDtE!su& zYA26xyS!_x`A6YF<9{~R&Opg)mO_GYfCT2gHNG^x<--!t&Ke#<-S1e|f}w8p_P4I%_+1RakArO~qOct5+<&dwx5qIOEHOD0%ch{m1>AU7{>5 zwFA6K7WkOJM^s{U0Mpt=mlfc=5dJVHWT^FP-r6t0{f2mdI94EnMBhHDU$*T|RILBD zk4M^m!w?@Aykhs<63y^hbnR33_f)O0L`wlC26CeDm_vMRbkae@{`wEoq}^d9Hy9G} zOU{KoS>E?*1$%YfcI_`~+j`nbnk;I17P_qMhyFT>8qkIg9xYn5cv-+5_YlX>?||y| zCojHqg^9JU1`*wXJagSzPRFK0z-fWgNpMsi0<#6c=O9CDd-ZQm8rT8vnVXLKBc3ns z{#9YieEIpgO~6Jg&iq~+&NvL8Z{a@9Y;7ZyKJTZYw1U}Tyi$VWiAz_KA6;eC5lf3SkUY2kyC z30%>&MNz;j7xC=g3>jqWz>vRZNcL0Xc_`)ii0`rYlIN`JX?|L-3*zodXpmeu>?Qoa z*6YtTfIFd-#$dO#O%?{NrXuwfOwJm zhwrma^r};8`!$~(Dzm`#siR2!yX#E(q!o|y_ZG3{>F22|>W1eJ3)WCw;fi7c4XR6^R0_=-QM|X5AMWLOJ^SQ=)0@ zcSpsg3C>17U}9OsfC+mW|4MTxS{whOd;gE7?~bSP|KES7iK1j=7fMKyJ#rGV_nxO= zgro?Ob52NPW+khWmF>t#);Y5G${rmYvN^}Ropt~2&-d~8{o#+}oIhUo_559pZZ@Vyn!J*d`TvWzH1;nl4R(*Ddm4Py9igwuF zed1~&MLEE%2`S9H{$r}Ld1vLA>&p|@#ruo?9^BPa-vmLR^Q>TSMN6BZ_FwP==u6-> zzp~$=d3kFFGsXKa{?9poP5X=woYAR`BMokD+KKke z?dofg&jPj2O|AVr1LN1X+f!j`FE1YL?pjPg)`%cArLh==(IEZ4;aqLnx;1Wd ztSX|_GTW99w?!L32;^{W=N$ z?!X8!G6N7}eyf&uYC32e~^}^+lAHw2H>eGwJ1h<|Jw^?kR*X?Sw`sfvbs0@Dr926)T zsk^C(rPvSYN``fld6f$K0c&2F^m95jx ztVd#)PlNO;YJ{VD!JM zJaJ=Xo`W=P{=*eimbRClO9&bT;0zPS% zvyvAv(ea{>L8t$wGiGlzFQk+h3r4Wa((kG8Be!tK@T6uiPMhx2`4qJ_T*hi{5dC?z z6Jqd>hieyZ%D%>I)xJsEv~3l19SR9S=+=aBEJ|QU_lho6P%KxyTVC~ixQqJACqgxI zmr}O9=!5VHJ{sP&F|z9L4p*Bx$Ym@0NVgu?>^`mX>%3b!2UYIO%e|#%iCUqsS@Xi4 z>Pp*H4UXNzvulv=_Y!wF;P*Kcae4!_02^=Q!?jp}9!Rj8I% zNc$B?%YcL8&Rf&E%C?pd&(lg4YroQc*giVM;QL^6GSlmoA&=%v-dsm_eo?y-z$!EK zB+F1obyMN`Hh_z&kC-xptBh)^@K?cn;u za=ZdufLcDH{0bjTvxg)WVV)hIxEs3FHUH@4?VMEh#Ba^Ghk|3;RF!6~f=C}r*qRg~ z22=IDILWRwzRzMDACUr~;(#wfcj>zO1@Jw*9qsnnRK@bve$fxiKzSePSq4CF{HoS7 zE3LLHu$!Rq0!KLX(vGw6v#@c8yo?m^um9wo6?l|+O6nryRWvcP)0kDz@XF5V%V!T7 z^m`);B6rTt)%y`n=|Wl_YJok;s9Auuzt^U`>?aQrVwQNxJ-@QYa_uJWsi=Djz0qw2 zD&0=X+(1zFIX)@Za`asuo`R8wgws={)$lTWZ)aZx2NYz)rs!1{Q&%8Kues+#S1ohQ z-4|N40%nnCnWZ=(xZv}xIN-^tV5mzv*uk@&DzAYDtzEz4-E=Unvi;@PB#c=QdqA`Y z19^QlvRT=h)`8P##&dIbt2WAiaU>ttKi^I6R`~lm>7Pp}tVB9j8;_W9gj|QuYJg`* zy?iBTnJvAB?VBH2h03UZp547^jG)jgR|8EU68Y3JEL81qHP_&=>TXwSFEID%U_lRj zPcZ(X_(~{9PfxNNOhQ#+zMGs|3o~P=_y)^R`~IvSNF^xv1UYWO3V$5U9AS5`l*73I zINN)*J*V`}u&3n>^q~G;lP}qgTYvXwepXXM?@V^MKaHhGejn$pV6ofB6)Mgqg#E4** z65rkZk$WkA7q}k|TC_=hG%HS)4_N;@gYz1?1ly8-RA{_<$9Jgr-+;}JHv-h9x-Ch^ zvtBBzNw78LN6cL<2H!azX(AmF{ZBfJbYn0ySB; z>18%*Ayv}>JS!WJNZ%W`ubuXg2+AQ2w$IuU0ETgsP(j6)&6cQNWGeIR)j#$}*A|QW z7^#$m&T<@`93g=0!pE)F7y5dx5aM9l0}VIsEi0iB0GkWnh zpX+OPd5}%#P9cAQB7^|Sug>6IinzXmP0Qo4*o^hm;P+b>v?E<+f`mPaEdzFGlGaG(^GV zeO_qVf11l_vcOxAmpk6;?dA{dwP^u$<9{x*j~39Kf~dxz1%7Sa{UG3RZiP27wOY?E z8}?U#Ji3O<H$Q0{usxfi7y{e>+*tW?SRy@@RGFh+Ah08Nbj%tN zkBb6V;g4)C&L6F%49KuU?K^Ci`6X|Dp2UT1{HZ4^MB!(#-6*RQ z@Jx1t&$)%hvEZk>D`?^tTJ07hujpVUsd$Wx3a9%K6!r%5pm+6~I>_3TaYdvI-QUCC zE_cUzh#^M8=5*QeyW()k>ot}OpAW|e(&x>?n_i;CCUl-NbzWT*z2xPDdHpW-yr8Dx z^xg0_)zVD$L`Habs7}ufb;mnz77J3Ev~grTU{UlGnu~2;oEV1Fn;7|Pi+?5*Y&VZl z(j<*tiexk_no1+B~r2awiS^MX1)WH8vi1tU#_66NQn4QQp4_d!$ z`IMa8S2VrEOzQ<)a}0j#JRnM~Z?~q9PoXr0KdQ_AG5q+173E?`Es8F-uErF)JkTFw zqEDuxIZVv_{wxBVx+nu@QEy-SB1Z%NAQYLYf&F42N3OgjVQuenQL!c1cGzdF-kb1jp z_U`54@m&xy?W#LcIl@8+WtwUrn>Wj%2$H=!EWf6PN>>;J)fwHRN9I~r)MWiWg zCt1(nmqD89+ES8n)53?1<8s!g2U|05yG;IGQi_2|e@5sCbHB!r8N4u>OzpzcpRYO=PJd~7A7Qcxbc_8`ppdH)wFhwbp7qxgf;(xl(9lG9rnTGU<2uV%b^ zLDpt-F{kXl8~UHT#EWP3`<422EDJqLzs1Y}7e32*o=1QB&)_JIkeCCh72mj$kvl!# zOB9p8KHRD1e>2|Hfx63f`@b&?Zz(hQ+uoQMJn|`i!gDDD-ncxHFY~>+KcnW!@m<^R z=K33F?Dyva!M{)c&P`Vhj?`5JouA?Gt$Dp>G{tfyg|Fx?{Lix8JC&Ysz=xH1w?z%=`dLM@z!5!s{P-)5UD4DvDurJC zdYpjiLq0j4EjVWSP<>}j?b*!Vr-vJLb1KRK?)kq2T02djsuugAPk5m1?Z$fEbk9T} z8yX8%h*?~-N3~k4ao!o=t*!t3*G6-_Q)6q_f36Y#HZ<1uo(Vhs0?c0grWLkGPKO!7 z%bDl%2By(3?6+;6U~R}Tue*1R<*d6CyzbS)Qt9i7Z+2UL3fIoL;b}|rG^oqu&t>H2{&%bZ4Fiv<-B?q2`Et1-~#9&2E5X%#lV%XzB~7h=3r}dQ94be zF3{x`ZfN#t{5=>sA)YU|>$kx#)!o%rZq(cggUHvS;2;>hoJHAe)&DOrVB~T< z47OwGagqE#6nY*MzHy00JqBcnb*N%(fA8z&%c?cpEXiihDLz6 z#~m?iS;_(1GX?lh*X&tbxbMoAGPXd2*VVUwX#!t~K2XN#p8C%k>Zm3h z1CH%;j2!J$pq7x5l_`#8Y?Cy&@=UKXEVQUg@q`&VDKffXXpIw$|-qozy ze6Ikfw@q7766KFf!u$lrHf~+x$W7l0n0!2X;%sBUY@ADtO%-^7v?tc89x#myFh)4qeXtC84KIq_#c?&f&#jF>?~g6F#v zo|xrvao6D+hU2@tz0|_YhRR=lp2UW~d*!Wa{$GL~2MYeDYbCEC82mpKUHis2q`82( z{!A8FkkCh6sJUx6|8}M8H{pgVl~Obaqb{~gRWfR&PAhE6JxhK3Hh#`$M_?|f|LyTq znZj~*i)2|={k)$Nib)6=@w|~XB5&yMLi$mj&HeAMs4O#YBF4vGIHa9lSKw=bWb$WF z56=tqV$Qv~{ypMdpXCLrDw1+A?Yv>QKOHhYzHPugA0tNHzeLk_Pkp@eZT9Wg##cRm zYGpP~u><#l_NS_MJ{eLja4Q7kPc(h0Ot#)N27K;t+_bGa>}Oox$F-i>9>RuRd)51o zH|EZNfjtCNhil6n@PBTwkSLE z9Q&c*KJEeBma3O$Q_4AJetBq>9GP4stT*MH+Cx9RSiV}^fw}GPg%4HZGqYyX|4Y|S z7)I$5`mp^QcppmUv8byqSvC%@OppkCQ%_H{pOo{V<<||IttPopbhJE9KOExd^24+0u&5QS#XOON! zRW+kn{~!bJl(1^1L~;Tfk!%I$6Jq@a(2~|xRAEz7baOlfsR0gkvOkwvzJxk)rar^kKl-p; z!sD2gpN1swyPMM^Q%Jg%&GHu>=00LJ&&DZ<=aZE1Iw-ct3R*#=cjC2dV1<0M>39%c z@lJGxAMIjd-{gdwSKW9^emXR9QZG-05bL*&3zkCl(m-2^9BDc zo)PC?I5t?3L^aA$T~y@`DI7YTv~TWc*YKjU`^N0n^RZHgSj4i88f{q$aqPB2jp?UB z-QKh(7zc5orO36CnPGWZZ@nhPoMasbv3>;Wzb2RAo{~s?NJRcP;P7KQlN{MeYh(oaP9<%-Az+FD5l7cAQtQ^>bXS ze?08>&Zr&0O@Md`MI%)gziIZYL`IJ@e$noGeGxbHsLM_f83*|Nd=QXax%W-rIujX9FE(rHq~5X$Fw@bR!945(T4F+cIaQk*8UX}=FX z&SiblERRSOaH1%8U}j6_(U#OYFX!#qx|0gig#j2Rz-<8hp7A24j`6#NY=#N6vC~B2 ztM2}&uX<;@i2KjmgUu#V2d_jNA6yI6tga`yNG7Qr14+C#UjxbRK@X9S)A!)*bcMy# zG=3^t@Z!`7=pqMFx=#jtmh#N)v`+Psi8i|cR(NNAqd$k9OqICoJ>kU66F|$JJ^u}-9ZLhvBNr=ggF zol@1e;vx;D1MPlA+aU$)Y1&omE!I|6Lf^jVU!D5Ea*PdTtIXF*W2UxurggDiT$5XP zms>9`En&pq^4dS&_?{R=Ppo!6Gw{DBh~Sn10l<}u{7Ie|URJ54P1TUhx0b7uGmWs! zC$`?QoqTxFR{rs7gL%I~olodF1e+ZGhA?v-6ER--lhgt)pXz|8k;2V~72L%i2aGld_+ z2j0`e&Op5ESnl?2ZQEd-I^9APH;T98yrLdW(TNv#yDLOH(9Yo!Mq%<@1llK~{=n9r z)XP=T8nM@`L(``V2AL(~&?|urZrrawoc7Pxitg=Sn|$!hmhbOE!$;iuwT<2To=!16 z_tYomi1B9yfQi*C|LvQ|BFCK&Zma_bMepQHpbJS~oP$hm^G+=<<7ElG#VNbOjyqI; zTR3RS4e8rlsrb3Re3*R^m(}3EJ?baC)HWSRbwx%MMDQFALMuVvv?%UsR&^F+njo@s z?j_&CllYme3d}k3pma`sTE$jzip16Q-IH0RTsV;!Ku%{wbWuM=+Q^Ig(4KwYgJAtd z9A4M-%=k6p6tohyiE`m;*R)0Zm4hi9F50JCH?H7>K3Rlb5MYOBE--vF@rz`^r%cU! z)fiOXFx4UKx|2o~lvx+*Ac=gWD_vCZ;`Z-|VShojdx`w-E`G$FLP2UMC+atbj zJTyXCPis?tzKBJuyh=@q(0qIxnMtNerAwYx!9yD;HQ^f%U4}HXC^~_Rx=5 zakN&b(|h{zh4MdjB$Hm?uOA$jrz@vjf(6Y(rvc0T_o(u0NUY)k?9~aupnf`6iLk~3 zxp0=wwf{p*Rt;{9K}~p2*OjOGu@))5GLr#=IbrUqx}jf0UmBD&m=u&68z~yGE3Ge` zTIh)AS9M~5b%c)r4%F^KXW6{HbG4CDg}3S%tlH}p{axPt(4nT>Z|ES_BK&#w;`9q;Cri5UFZlAPP=c5S}}9d`E+vvemE5nE>xd3pNg^Xy_=CEx1)1G2LnAu^ zim3(V(BXRR^#0yK`WYSeQKrKN{-+zuQIE0`F2QFj|#T$)pUD=b(CeCD>~)?JH&R z9KF%WrM>(vN@}H;Xw0&{1J`@&=pdq5I~9Kn*0aUb1Xgt?AYEEjPD9o`l4y%Tas zN#Wl)hqmiaGCvT`^4|V~ZVQA2LIR~9-3z1nNiBYX;>=AR} z|LN3)NC~eE$Yi-be#SnddH4DQ?e_0U;vB*J*6w^p)B{iE{_%V%&K^K$`^FUZ)81Ki zKh6*Oa<11wChAbZZM9}CL@2JB?S2eyycV8ODsmBS?$W=Z`%I3e8ROlJ-{n9{bk6>p*AfHG`&fQuI4r!Bd(#6jmqG z82tZe{odj5AB)8$I!g zNUtGw6f93A^%C+&r?A_khQ9|mMN$2IT#Q36HG{U}c^N1A^<{AFi=d&MF>6Nz%@1V_ z+XzXYiII~=I!O_%k6g-X0NT_bQr>-6geMw8c!Rfn(XHU#VcjOk#1#JFVU8ybbQ!|e z1=1u=4w)9q^p|6#Ty+{jfdCw-(v%pFvNa<49k&Kn<$PXo3_|Fm-b7)u^Er~w5C zewNEG#j+)y#jUcYKUojs8)~oP%yM4n)yvj&T2U~`9Mg!DP5QL(Hb*-xCHorokyj9; z=39P|tD7$*8g(YukRV~}Nbiz>i|tKRT)Tvs$GMU$f0usIL$pJix5C0v+iBt3v2H_bb6|eOq(JUXmwu!+(m0If^r)8jL z8ePMO`y}=g9Xk`@N*(tGIdlUlX*FO}aq(eUF|E`c6K^fc4eNk&Ra`>>UjmBaP1l3BHhRj|I@yC2Z| zI&>A;4-=}Gr7I8u5KVMTh0!r$h)~Y{-mo~-iW`BB?eRt}6!x66bt)xm)RYLu!_|3>kHsbPUCNx9m6hpcf z(uYrRe<-h)c%U0{I+##cXTsmAgK0>){>j_Tw?Rg5$H; zu~p6CTfbqwPqTAGl(jdK2og0SG9jUAF2S`MW)D1=1;eixZlv}7WB*SVMX>iQ7^mG+ zEiI0tGYVc9c2X?z|1lMbvrs=JLJUQO;a*VZU#8*ugnccDEziTAY5HQj3zAEth#7u`1vGOuH=wW8Y2(qaY4kr8)!-g~$mS~_tRo>M@Ez(g;j zzK%WNGt)tl9=DQbw)2vdh_6Y5M6I$9Fra6{oDoKlvY0co2oZ#Q+CR1{%#3t)$^`r0 zrDrnW$iS1vOVV4&z;Cm9Z(tH)gu7)zgu|#$?C^0`+g3TRP-|}7n^E#*Tp-9s7u?>fZ`;}Z z`4i$)R7pwMMB1aM6|IMJZt(q7G!9G3IFNeboKC~=iSX=>;@rrx9EMWBo~h+bK~b+ zQnTEr-TL0F1B7_M7c;j;r8Ypg! z*v5Snje`GofVr*2cXfF~+fk=!m<)#}%p8ob18L0v^2%lXj^C^6Z2gE0Z7EAqGs$ZH z78@1zcg)tOcq9LKM7+Vk^6tp`B0lVf+Y^=6i5vPS;DeQt^PPdIU*&y+Q*s-XQa4*B z7{{W%^N|t`twQ1R@GzWEk!iCO-wE6U!Ps5u2k+>VSC`b}KB*}o`>;d9?E3*y_jx3& zM@1)K#42%Dpz>6A$v*3UYfrl^6c3kC!t}|zyU=>&=`jNO*w_?Yf(4e(@~mRQ!VX-`z*{icw`B%B`<-;FK!>`5hqrF<+&9u zaB*?GyiuV(eC3kSmzUAS@F^_E0ZNW!PaJdr`%vR`fS--~`Oo%ndpS~#9j&({8XqlH zzY6&fi-`P>t|Uk&MqHr6|Mk#(CEGC53I`olTBhPj|41CBHT7He12}HnYJJ>3?#ehK zjqlbGcp!=#Q@T5Qk-`94ub?rvk)Gt#xDHWix&t#wLRvQSr#OGR_dbq1kUL42fZr;2 zTAhl)e>(Nb$&u>6J#xGljr?7bHIx)UH2meF`mPRSKkz=pB?if1c9bLW(W}m#cmvAM zwHp~)qP)PyG{9|@o=|CAYDbmFpsIfA5t3dbl8@jUe-6B}!JcEBKf#U1moFypwxf*a zn*l+tqsHEh%h>k!*r=*)K{K{?VMo1&AjKbkPL%-1>eNNF@gVv%(gEKNNE6i`1-%nF z9sIM3>nAVFp!n|SNXL_4zr$hYhgFR2yMf+XsPE^@oOVq0e;jBJphpPLiCm%urX?Ep zjtLjHliE`Ud|7_q_kAy(aB#`x!__N>pY(*{lG9S+)SlaM^1Cp#>uFC{LJ0gSW{v@C z#+;}H*$0{pi2@ms;fep5DJ$NzCnq6FfTYA~wQL7=HYi^OJLW#I{0T^;DA2m~nBE9+ zNLv^=-$b}ONZ?wgL; zj=!cpV4{<4%b3rxl(wQ2ZI%Sd8r(_IF|TuqcLlxHQO|=0{Vroiw5DCDdT4&_tvA$H zo!zNcrNFEk%zCsy4nvuu5vc;w7$_vj5Su&Ws5y8M-@Mkdfk-k&*w)ab;|fJGf|n*pwLuGn_?XuqWU%M zrbpvb|KVQuNq4p#v>2lf|6Gut{1%D%&`N$9@e6w&%mF)XZ;9(;J7`ku%A_O_Q+Fq@ z*fI5DIG{&X!4fY`wf!`2RN~`LQ?`R{F(m0Vj4J;l)`r?}r|#}P&ojTCCky6z9huewX0OvTbK=%=55*sXnqY&Mw_aPz>uX z7Ya=WdU@*2Sc8#s>TffBe50andMlrQG%pjtHbG_7sc12*XcIWClB;IRLo^1O>uoBd zeMd)U#P7v1s{AQqC{ywDjZ6ybIq$%Y zuK|Xck$2`A{4gU0EJ#TdlU!+(69+WhJ*~}Fa@5xWuE|jSEjES>cJoV-XZOK~UjN+@pbjPU+mab49|B zRY%k*b)1W-<3G$_EJTtjJpa*P?GVzhfNS(b1l7mX(vD=swc1WHYOntOYApHvAtlQE;HWC_(Y$%Y_D%0rt} zk_=etfrfVi5aZWlQA)0j?K-5CaJcbj2V#z%&GLZ8R!=)(#1=8Wq>YEg3kSLPL>7OA zk?d0t`QJsnoIuWL)PD2ft%VCIWWXkiR5hDue`9pGuq38*8L0Go?cqYGy>nJftGT?q zKmBlu`p&nqnB*1$mc+u(WPCL_$5xuF#uK(4C|#d*&05she6~VES}ZZrH)_T}bAkZl zQ8ZkA#;NJ|pL{VJ;4T$)>)raT1|vb<2h5C*ZoXg?JiWfFaAlMYb>M01Z?p{PxjIjT ziinqS;<{j>yzG6NaY<6nZYslP`Y+3CAsFDDMP~VG;!WpowIy)7RSB&aaUPv(VT2B)JRTm>*f z*mD_mO*(%+zR!m8_LY|f(^IHEXm-pI9E*}CaM_Ar_RDjPCl!v+EL8k=^Hm80>Y)Ew zImyr`GDpUTTheu)#8VfF!#?Is$>A zFwMO%E$Tc@9FPv+r5^kdmP)qjMMo??+*-iZL-!QZoVxCUupsduMC|%{bt1`R_Tzx( zc)Q@WjH%j4^uM1f<9MnsqHk20bJKnaz5&aIG)%+Q9w&!_ub<qp+Eg=Fgxi4w)_32Rk?&fTFhuGVc>a0!jZrwGTxUP`8lKwn6JwGY`Qs#APn^a5 ziuzAk1wKGsOJV-mrT71(T(n{F@iwYIWz{QsMdL@mE^STR7K{-*?Hj7Oc=6Uf?rdKF z;AvnjwIj5Ao;9cUCgE)+@5>uGW>riff{hETNb0*34ZUnR%l_lN+o6hlKyj(&5?mb` zLQX{p-2QLfl4@$~a>{EN)A1%N_}CRbr^w@%k^7 z|AQ9j?mh=i45lm+3{WE{F9Fk>i9Hs|hg)bRnSDkuL^nlk$@g!Us3^;}OYqN9XkX<^ zVWx@BX;H*`t5F;q+++dlNn8`|ajshW9p*^jT9goKWsC+)=h|||VX;XYg8t^8U8y++ ziZNTBg7>dXCB^r_$~>!KS4520Mlqh$9?y3CVT$?|jOpy#JfMNigma@3x{?Zu@{68X zk?@Q`e~*qG-g?k$@4U*w>en)E+|6&FN6Xyz-}cjMm5?tEar=7xB$=ma(}(I|RdRC3 z@pyCKLU_bUcfO%b6n=CY$@)0i6WneG`GsIg+-4`j{K={hPc;5A-urJhH9`6J zYr_k2pNR(ELHW5nN^Nm}tyceY*Q+f^m=*1VY%j29d@ubN`#gf+)>g%#RpmehIdgL@|ccP$2r4d9+}kN{a$G_NU+x-g5LT zRh;sVZXSd2pbr-~Ta{vPx2!v{7d<;L$;_-b$4G-uC>QA<{tC`?F9=FL3VVX>6HqVM zO(-F!7Slabrq3389Yy*P4z&$Ugb`6&zU%PaLto43T32cha}E2!O61q|)S4I1Ev6<~ zGj@Ug|E7SvDKJRHZU!b#fs=dx0ZO6QYtRr61Y695)B0>JgB)P;?C^$^hE{p(I{`1p z9n{zr>z{*_#dcdC2!pPmeZKJ+aBZT1K+@Qc*-f6XkUhpOYv;Xb-?GQyrU1vPx@8OLq|u?X~-s$*6caE z{;&Zb=1=S|38~bnU-MdFz}^U4)tNayys2J)e>?7@J;S$b?W9v`xGX06T7cch7 zF+h?V%QTKSaYwh@^dIXhrt`sjRxTx^v9h(e^B2kZ<;iOrAI7fF9hfV678yXe-YyYT*+Otc*#;u^aHaEDCnDNY%@|o20w*v@}tz`ISpqlMp9aXY!PJ zM=d?U#B-nll5L{}sg{~A`vGgnGtT%sxIP8R`jo06 zSHox$n=_H%I>l`myJ~(Yn09g^QmKLo98I%<6yc495$iJw1i?F&bYYqH7AqN~3_3-` zmO{jHWJ_Ma=X{;+%70dLd zFuJw+%p z%BaY5kE!6b{@2Slt4_!&3>V3yW*6SNMR2db?{>ThE050V_GM?pPbIg+7LQ{?2S`d6 z&)4m|yG{&m&rIY~50HH(t@fqwyL|0y)a@DB7CLTUeHUSdGO#` ztQj}#xZpVB>QPgn0cSqxp(*;)v}m3{!*6t~$=_{ZaQK$++LN-%bs%2Es{<2kPJTbz z57hjzjUl$XQrUFda{aDVvST-lsr0>aU0DY2X+I0Av$gDy{#Spgr0#x6x0ay)SWI~> zZD+e->)f%*PFmt@4L2^Fj<^?#4-OuQf&2a0-Bh3hb^l9HAKo;@;y)g#bzKR0QOk`x zCCMccHS0>fTisQ@Gd_QSSsEi=w@RB0FLD^H>S9<6GC4V!98-kQ@9UfqcVZ$At%}aT05`J5e84wQj}*psetnVap!*p5BU!p6FjaXrIP?R@N&4?_ ztwTo=bw6v!+8tLv6EGHn`L)CZ>3vKu&fwN=p*Cg+z9|EquXYebi_O%Yq$ZnYDz{*E z(c+Xd^y0~hwnX|!%?H$|Tq`CL!$K@80#<;;x_9_@sMAOph>p{}-^nv_$#VH^l-gRc zrSwayY((U(SmN(Kf@f?SX70X780r!(3I*k}9r5Xj7V*kaLC!^qT8#XYVh1C$OK^U` z_)0#KI(?A*trFq?kpt)lAOzFKi(_l15bW9V)EHvVd-zQrOK$~I%pulPn_V#4D#{fs zdGwl6I%!jLXltA7F4-aV7~`GsOo|%z>a@=`tRE;7KjL_fc|m@m4Emcyen0F8ic^_5{%?%MA6!x$p?ZS`_;N`?Hbx6Ne+v$E3+c& zx0<5fau$f5l_Fomm`$o=!5cnUVgDN|=phC!$i#}D@$8s^ELa7uAKYS*+z^4t#=NjhTCD7C%5GjY?bJGi63t zc+EM9e3@s>G#8=w^eV08vd?C!n_1RtAas7iTrA9x2Irdj1B|B-vn=m?Ep+HyRLgIi z`K;79fFgF=XeZb?DoQNi5%0%V)dz|s1+1B#K$h6r8E!pmmPr?K0mAndIll5eJtT`X zxLte^3!2FM=%$G-*rOI&ao^(ex@e+=ct!Stlz0ok_=1|dPAO(*`D!`qu?+1byyhQt zge}S2nsse+ftJ!7voY}Z&H4~*r{cU77W{n#+>-IRLJ$591znZ#DWGZZZv^AvTxXn_ z=hg3aauj?ct(I3mP2oPs>o~p@00X}NP`UZ?01ZR*tkNf9@I=&6&G1P*w4XdO2~`~6 z-QgVrzFJlQfVwHL7%|XUyRiTqAHEe>YI%C}DtGqp!+m)IbWMvoZL0`w{UUAW9yXvGsBi{GL2YN7F2GCNUsDnTvfkk+#Ti)sH=Bt`T*u;qXR#I$HK zIW7osqMwcM6(C?9HeLYrkPhBzH7UoUDi@!1;mJ#{Oop@p4ywdKQH#{Nx4>xLCYqm+ z;g zDTLraYO_%w6)FO~D4aB5F=hh#E;hY$(pUF(Qw&^MO>AGPon$<3y$g+kC2;iOE2!yW zTkY!_oRl9!u$2F=1%MgF&0zaUzQZ5{uv?)ovPgS&49d1-qdG|(Kn)>+r5w(uz6`8i zLAUh-{WG1oFDDr~X5${#8})_%tV|{y|M*ni(8Nwsy6?H=tGp2)bs{GdtNw0; z8e_|`-k|#{LdWG?7>LY_P!}82V$z@LrKbF9Mtcn|wF&h++~nA7=)9f#CWD-HS(MZe@Vjs%@7xJQM0~9;cnRBWu}E<83X%Hn z<)rSTQ9O*X@CU|Q*=J0wrVD=Yv(v$Z>ng@@u2qP7wP^3uP4KnFo;{z`bSjNFI3y?= znW(wOU>8(aS+5e2+mVVmnJ`E_bS4yd)P>p#O)2abwHt@C6Qjvo$5oP`wA?y#?<`gc zc@87uLjODdu@dCknu&p*3Od;-br=ier1t}k7(&3JW0Lz9+^+4vp4?;0X9`ep$&G8+ zQH~0l3!GR|*?FR7L=$fKzmF<(u(s2ixN*|opn4SgnybAx=7GRPNAt)j@OU#Koz&~1 z>41NH3og<`f|x+NK-z7mmURZ>)N~&uWW{CN6e;Fia=bh1PaB%UD-ba@B2Y`Bc-V_m@hLN0)%bR8Ef4?Wl>l6%d!G4uHDrrv>nqTz?8C?yM)=j@Q+~sC z(OV6jzxZ_n|0!8V;XcWxAdKJibd8r)9wyAIxsI%is+=mK2) zeDyTdjwW#+Mzjub0WE%0G1cDUs#HAnGS)~0!2tCJ^<;@KQjKXY^b-&cUXgG5BAv*2 zHRU)U2E)ip38|DL)OMhJ5FRwq>DnBGOSA*(_26eau!uN#hHOnPxU)`U`Skp6tUef@U?@@F+ndFkZi2zv7kkQodY=S~s=a-ZDj~^6 zjz{@mVUTj+p*bD@n3B0B9K!^jx^B+^-$*}{jK^5Iqj(l2Hc#;b?DoI$WDyX{S z2DxSQ?k@M5eomEepI;l2+$eo}O16egH28SC`@Ij7SCJvn@YYY0C9#4>9+ElIo$>w; z)%fi@MALf%8&fz+0yq+`5J=7E?m25@R%>oIv(oPd{=8Gr=~^(5RRZ{aia~B06iYRP z80#%)yO>@6H!_#rjEjW#!uC(Z=KHP;{6^Pw?3;|EnK4Voi{gl6an6Aszu{%dwrpAO zGIfDP$Ax`Wy?Z-fyw(DyC)n`F}KBc|25q)UH%YqEbXfMTwFS63tYKkftojlBvkPgpkb4 zO+vC|iBLvm$zFC@u3fTB+1JJ{>lntE<<7nD{k`w!{Ri{;V0^Cc_nhZE&vVYfmk9eM zN0$ljco&5niVzMx1dqFjUoHVomMoNDHQ+-JAi@l#Kx^&8>E9llDQx_ac1yH?b^iw( z;B(6HRswxEc>Pb>vy%MYvlcrpc`pqahBW@P@8ZvY-`RL)_m0=rOW*#z%nPsGmhLHL zr#ccd!apSdXFp>EhyFdezK>ZGZ^tV~5Ojo3XoagDrLt3ldgS=!8-mC8 z`<6+c^l*{eGwiy3PukP6#Gq5l{>kEvr+~BEW5`I`!+McDk?uqd_}Wf_71O#h=;dqR%>uSTktqrZi8xK!lk?y||M zW@a(w${wc>vhn7glL!1B&Bt=0;8A)R^|ALD-uPn9SN)ghF`Kc68@VQ218_oNs!R6F z#q?_UP|eL)S!frP$Qb{1gjW~EsOf@EWo^yV@7(pQ9@l3GVj; z!%uuz?5=4SB~a$Im=(>`Q`K|EMis~+N0Fj`P|{P_+I|E6nmPFAiS=5>1%B0#g_GTl z4?XlmmD#0WK#}_+^BUz3^qsf;j2<+Utq9c*5e4gJO?_A~$9#)CF!Iv{imezZY;)ew zN(JRzP6F$JrNY&;_`VmN;Mx7afp3QM#!&2Ft~Ob33tNiuT`9k1Ha;e&ZXhF*;hyLA z_@|=OI{l%J;`WE#x-~~YR%W63PD?Cf&b3(ZgqOBUcjgKenIm&&%HVM^015%&9CQ$O zCQA&yg)l3%OP@E(&NgG2DhtL;FDb0|5+my1o7>QuMB4{WKPfsVU3>((dd%;X_5qn{ zR0zKZ<}p^C zt&7}vbilD#@C~|ljqN^&d!8kCm#N)fa?7FLjj7lq#0-Sz&_Rvd4|J?N@{zky12hR8 z0){K01fk*Q+*l@c0~w$Y*J!*MF<=q>`^|t)4lc|h$oCC7FX}n#`1?LcX0yVj0qLbh zJarc4_L5J%Sf(_UkLDBon*B2 z7;6P{c{C8uox>svR8EH?iLP5i&jG*9P*g!;0=hFj94VZ)Q+WHXLS5Es&qml*y|R`Xn`W>a4IYE#G_}$s)XS$@!^y zjvubr!m6FU9b8aAzaLY%ADodQl$UE^|Ea9VQP<_}mK4lLvFuCuN-|96ydtRIj<8TzNf66Wnf9w7ak^C_PQUw{x;n;e0}!rnpP zT+u90i(R4IoN(lVOvTd{@bHBFf`~PfBg~cKQWw{3R`Bvv&W0=6y+P|EDzMCSt(y=A zP4^!t1${>t$sj#BwUn#ukKRbjwb;DCm2Ww0J$8Sl2gKtPze!t!=lRGuK(ElNGaeE;nNZC6EBc%cXqCZYqv@<8>Zmvyxc@l`}u8G4v>YU%RF*aThoY z1pSR7Lk zryvfTeG}=u*NdH&$o?yF3IWL;c0ust+SQnwoWrgiZFawLVc*r z^b3P#33J!sTpaEM&}~1~(m74>`AzlsJedFlY3n!s=C>M8fCu?=GtVLlC*q103I+z3 zSqXPmf044sM!?qo6(J6_4%rIz>oj9#Sz27PL>syyH&pqZOPCJ98rB>ZwlHmEzlFIA zAl(v#PJ_+DveM{=nt#H;WC1TIkoN+F%(S02+-g(yW%@Z2AO=U#eQNcmJjAYei)~0g z&Sm$m;;Rcx{!Q8>BXNng*p}IpszHkGG?;ws*ABiQQGeGG(;rY-4zkQDP*`3YR)r-e zR3hn@HmL4>-?&TEdI0zg5T3DVdFyvJ#uP}AEFXIkckT#)6-T^tYvGCA&AUP(!VjOU z+~-cI`ube7LP${o>!|vdk)IWC>f}i5&!eQ}eXq&uC&2Ql1n1)VD|=5Tw~{Ud#~!IE zDf-#TlTr-Mcy>tY8?WU7e~pHb_XDj%$xSET9sNz7kRzO4&P!SI^^}XV`@jynoESj~ z#<`XXyCANCYh!rUNbqgJtpf|)`P<3$+GMAP)dtu#++#=Uq00*Pyy~t*nf}s4oHYBb z#(7T-=I|`j7vwlDI)iTg7hMERT6l;L%V?6~RZk(IQoM)z6cSc_y3?X0E%AHyD@pA2 zRPSg!8=Go!A~^<_h0{O(a#4Eh)AU})vK-qq7E}bk1NYCYro%z#Tl(h}y*K#YuEsRx z2YOqM9bT5>8w>T#d;w@+Vu*(Hvjs)p^M?fZH5gkM*AF4&W1 zlm5t?2dCo^QWDgEHpguNe)sCawI7(X9>wT|eT+zjpu<~fi0CcG0u^>mxe3jF*SFa$~4z<=&=sMdF1e$JrT+R~>f`zE|T>IFJ~!-Vd2IEhf4yqr6$#_CYL z_R<-lynR%OYol&*wjC1#$TfPdd>|`UKKg3hfk!VD z3quIUPjmhW*zFtGev@_lw)u+OrVYAwwCRi<;qNi6e?YTYw+XRUk@xs|ulsH3!r1jR z++397&TEsEusvJ^Z!zQuLKVPflk6QL(McG2hsR7>E@Dnj#BK|QH@IahuDCKv5t`fr z07q;S$SGqbV9Hn&j!Bsy6o?5#(lK$@rTpJMq7+Ff$Nv`%fdfF|v$UyV;+X%?5?U@G z3cNvKE_4>!pt8P8v1#xxGJeuMS7?{9N#N57iY5l;#OAZ--hO?#E1xcgcb?=*0Pam> zKLVT8Or@>|VqqLYFAFEliO6TO1M`QQPv}Nz{+Dj@>1B)djq0CaSrq`Dh~oXmBfWU5 z1@zEGhIR(i`c^hJ?*q0sr8+q;Z*5){F;kF@f6 z59wGoN4>E>Q)&ee%W^)GfI&+^3IAoX)LOSg`)pdz@1}6RTG-fnC|A zk7O@}wIsc|Fua0QFOqQN*5)2UF_HAtb_03M1+P$H#u)x{J7?gW*FUNXP|*lfw}>J^ zl8J-%U)m!Rj3n``75arU~<=t6*pbEfA5G=yjqrF$!NFB)8=rvHt zS+VEY6Kj$C;A|K=V}lOjnX~}p1ZmJdNZ|MWIdM)qM;P@eigmuS$;8p6w*IEx5^EYz z-Jl&IrkXMR`eyGUqOc3u)MK5tN=1J+PnU3rL!mz`1^%3GRH&gZ9+M32#IDKyovjf} z&@j9=G6AX&$;Ui=_VniPBmM8d7gqt%09ANMmq;*cq5Ni{rS+=wy*4VBD_B{;S{xq4 zBp{uWZRX^tK42Yt3A!%@07wGDz_K(!LZtgOnOPI{88c_-((vpm6;}%Y6D=gNHa%G# zn8A_8ibA?M4iVryd8fn4AI`jmTuvYKOFY5ZC_HQRiOHI}{YrR`mCuoX*Ju6YhfA%~ zW@JZ_{MI)a>5fNuh}}68D)mv(t!azCT1*#K`F+0VGq;V~=JBbC5klMT&Ve~}1IQ_S zQSkb>$lpZw(jWDgDpQh&1!A(D+{XDFmf%aPTlahu78KHiZ(485I&`vC-*Qhq|CyU3 zbCXAcb!p~{2X<_Pg728FAvoda{FtU+lFE|`@aFHvpZ4-%RkG$r_qz%fWvrI7`ag#K zO1jvC{6vB=e0PguF`nqo*FVhF;1?AJh?%hP^|^1tDf3Ti0S3@`6vxz9InjCTRJF0F z2MENLm_HcbHC=e>+7DQlC}D6qnXdQu(2Jx%QzmwpA`WCy^pGxlT3L((Yi|t;Ki`l( zjiM-igx3HtX>bPuQLW%g$d~ON=2~-*x?0vum7SQC0GuI|BXS`T??0GmjAN{_bJXS zvn6Zd+FQ-LhWfM%TF-^s_#-_v<*tA$)oatoEicFWrAO`S@%`mZm`>ef^)=m1`@h9{ZI4118x^)L~fZ5XJ9#5Pv$b8 ztJI`jbsfO1yLXw88gMmsh}`zi&<1CuPLU_gcdzd@Yu+YL?;(GK8#3nA9kt;>Bp4j% zNZw7nk!`JV>GtEq`R)}P?1Ygo!x5^eg4FNUEk`1syI%{ZBdho`-?-zeR0Yym~ z9=HVEyrwN)`IWo-?l5ET&`h7knO^&5by^y3mHOMh*}daN>sY2_WXf+u586evgZYUU z*5`Foag#tTj?&J;tzrjot2u}&CQFQIGtr7ESuNsW#lUvMF-s-_duJKv%d%4OcsUTQ5I3VD*e@Y8o(BtTD>^m-o7)nXCoFdl0 z*VR7$-1NZ->o}XPaY?vt!>L>LohT4CC4gVx?z`i|LRZC}{=)Jn?%(^uVLM5Jy#1}) z@Qty!sQVlxb*^t$$fBHq&OMgcJ9FW*$fsW%`;H`im^xMa2V49|-!J^<$b^MNz_@OCtE7fpzwx-yCuLF6ZFO$uNyS025-SJzfYc@@wmTz>r81bQtlqJ-si*BO$KSw$ z=a1a&oCg1WLn9nbi!`q744rto=jyXP3754dib9*?B$0krWdzL`TCy<+=7qCpeNf`3 zbtWt9lr!Wl6nY~uIdyx8d}%ZFKMW*>d)CqqmgGxcyRh&i>B@@-CGX9m?h8LZwqVlf zrE|u^tFWuEGpA*%c-v}1$E!%;v27Sf^B>!VtTclxc2Qq!iw`@Mc(B!qqDG*c*?uQS zoOL%zIDF5dmZ0q{*nrFGG5r+NE>d%Cs~ZXvoup1*LCM3*b_HAf-X*sB6ED`3yVQYg zh&bo#)#nST6e1M&t=gX_J#UTF^LJP~aDHOV^@azdMnx2SSf=;xpJm9J+r9}cUw5Ac z8Vlg+AgQ~ad_{{bllizwJ6AVKFFCii6}yd`iE9C!u`iDAjBsPAuRHG7A6~&3$dz`7 zxa38&0hY)zLyB1eZFG@>F=dbecb-|0_3%X-kbpSt3_lV2sPU&NJTJS^obVBFWr}1j z!m3etPST5u4jTE_H|B zO7NLfoK>>@`>V;PA3}+ZkRmA;mkzQA`nx}0-u;>fZ%P_5r!Fc`sZ0bHI z?tbC%7p3#Rv48hW(2b2)n7fJth>o!ucB~;`j`hy^g3d>Yj{$3YLu?gIZ;DWJvQD(- zVf5EtUH;3}y*t(wAD9`Eu6=m>S_5k0&ReZu@A zurAKD`QZXdYp2}XS@Wtkg+X4FJyA<(tl3a}+!P(nO8t|xle$*lLYje3Pnl z2maOhlACw)3FD=zDuzF4;ZvS_7R{eHdk+l%G-9+aD~M07jhn*jnvmkT*h}I2Rh|$HO;P8 zCLx-G=4-S6ie@OkQu7?JRr%~9%qxjMwz>GiO+zSLzzd#;S7{;ZvoZIP8vCa=wQk1c zJ`t~!y?ipaFGBG7#pwNJuMeyQiE;Qh7iFA~oV~h7xvbmD=*ETY*V|Oz%9y)SM4(*! zE_{L)eekW-ZOh{9tO}cVK~2r%O2N0xW&<@(FIDwNYJ_8lH=}dg1HJ~9&$wR2h}LaR zm%xNgLXj6jHKD1!sTRMc4?%oY4Z`ZqUZ)`yB=QW9z3D-{B79zF8;gNX;8cBeToV?l zc{3Ck;WrkD=k%DI=bf(Bf!=^++fI3X0em^Sf%Q?a$@5}z!q)|<G%C37oV<2kALyMTo2f^Wrd?kU3&M08HXJH+hy&PA4Nd&lJG_;B5uxjCqG@u5&YZ z{6|H@U)vKSSs$d<69Xmsxt_(83HMH#3gUV;JV^xk@RWz6%Nl}(K-j~S zP~ljf%%9N=iVL<54_B@g6iUTHhb9fKop0LJ2K2Kuqi6%yqOjj%r$LYXB~RL`4&r}- zC|WQew}MWP(^}3OZ2Izf%*9{opSb_VF4r_uZ#oLCQNH$Xr*@I!ra?32m)#0T2g3wO znlZZPKq>HQ3du-&3ciDj9VCzWl$v z=cL;jW{zR<&-MK-sPL{Wa9ivCY1iRoDEw=_aC+k93AyFuTj38stWttJTvMv1W~(J? zrY-N=PUL=ws*;;=jXU0d>W*M8Auws1#!UL-PZw@=D$Z6h#1i?UOS!$-wJ}ldCgPBG zxc!;GD_)m)ZV|5IJB8lMA{kL)LXbu}9Exq6a`!j%hGzEfl^_`z>?*p+`7M(5;t@PN zH;MCpch6ADf`6aFK6%bpEH%Wa!s@a>=k?07oY;b&23Lqsidf`!q>FzFMR~{k7q+MN z;_GZI(vez<;)y%z*N^k*&QXt$Sp1onFALmVJ+3m1W6JHZ9CEOuHWj7>lchxo1nI6C zpc~tP8UD_59vtWl0Ete+YFsiqfj2OqAF=IIQ!cQlO663#8(EjwwQcI=_R}BM?~K)N zDC>^9+8JN3{;jL9sW??!9j2uskG7`I?x!Yc%^fjOg`XWJMw^=_FSh}vHf(pn&k>4k zK&ZUxiP{S*A5`Ie4+Te@p$=Al(Sw7|5YfIFpU{0vw+|Apq;^o1l}wY85R?G=jY61N zcIMt#*D$gxF?0Mvg1rQ_SxnZ}8M?k(Or4}C?@1VHp_cAQKh}P1h$BtA=OjEib0atK zul=WeVK-cqzN<+I{?k39(oB~tplAFPEOsKPSmbKoR96^aCNREpYJ5hxwPhe*Q*5OJ(w;qE4?bm=g(@Z)Rs)bCmC z`GVP5vGQ)5VfDWTw@joRT&rjW8F~wiG`8Xj4iX8m2&noJSuV>ZY{925589j2Q;7QG z!9;nB7R6m^uS$h_KndOeSY{_*M>Kx0Qg9196vg!G0h(zCMv5t$jA(zrliZ3-N;|k= z?reSk;s8Jar>i@_oM`3!V@S&^SnJN}`zNl*59{(;eY;S&n2#88fnYcf2t&2k0FU8Y zOl{;WQk@?4`QsuOX9fFzL+l=0y6Ew|6qshz%g%$U!29F#tJGaQ%Z-l{e2+{C4&pk< zt;sey_%(HMSOTa#$fXnebi+n;AUiVUH9~7-RYGMN#?@ReX%yvGt03lU_8m51STQ;> znT%CLcw3$Km*s-i zc7V3C-?wq)jJ9s7kbb(*{G`twmiMpnZ9yR$U;RSd1K!4M3Yn1(9J%m9%W~Y{3Ow~} zF8N26^H|8(LI`v1&gzDTh@}GI7|R-;tI4#8 z_SR@A*u8RbYw{_%_06F4TgHz$+GG10Q>S@`Pn;9|)+?g5q_p{-OL1QVZ;X`vv2Bb2wtoKx zWhu|IHtZ4eBo>-(Kb=$2t^2WrO1-2eH`CBAo20)ww-4; zPaZnUycbod;_$8T>E?fT=SnWiXlOh;VSK+p>+osETd#h4FJF50ZQRW9a?G^$gly>& z{Sd-e#obdo_-H`#CLG}bTKOgCn(lDhR z35HFhxgRmw0C(dR@EfV6l5mL+vB#LAtWRIk)7I6LW`371ugDVZQ1Mm%W~(O8KMGiB zgZVMVDIwT3Dg(O)h@!p*nhQAy3ptmflz0+J(2YTh<;YZ~KRVI|co*cbRE7~zEQJa5 zA%1Z0#PBL!^T8WoGrC_G*22A1spjwm->45B;Uh)tiGOH-UmcpjX{B*|8_Dq~#+YCB z>9a={AK@GQ`dH7D0PTSoHLe?ZmbG>UY6Y_?jTQB{1RpM?wGPa05u0gmZHJICe5iXH zpicaOUQA&+$gsywSc=!Z#umX-zpU0hp&$L=ZtS}wE1yar-#)|~UU%G}%3;nRy$Zi} zz2|S#onhZAJ7?XQ{l| zazuQ*wVB3xQO6Cd;ktO8F{Qd2`H5T;03!rs2|KnP*b}rlSj_#{!R14#jF-zc$4R?x zzIlbEPAac-97$4>&%5DwgH>r*v&%yJb^4{!`&Ija6nOm3o7d8$wARk6(K>2dwkP&~ zwiS6iyq?JNYJL0WhjrNl`BK$wTOw3|v)5y@!_U~#T&idE(!UC`zU{*B7hyilsk;dh zPjwWD{@#vXby3Vk#Zxo0PzPVC@@096zauROG|9fxN7h@F8d%x8T0&eNiMzrSSsVr;akmN zS1y=IPaK1Da__XT$6sT=N_bO8*+rt)cUY{=vc$fST%0?|r~i&wb-ITCc7|O{>M*4& zk%!@!AtA+fr(5)pw<6m_{&o~4OWlPu3Z!;<= zVjd;oN1CC1+anA`xkpawhR2bnX;frCVc;105tTT#p8Aae&43W#=tFcHr$DZj!&Nq3 zA&3`4j<<_eE5-9=wVr20J1v`cK!Nf3i|A8dcOMsq&$(YR(c{9^jo1agEK8o|Vy5}e zkTE>x@FMaR#}Xzwu!yLI;lsMdB(;oO@17S1jCR>G3+(5CR;-I9lClJ%Lc*n%vO;no z59^3+1AOTZ*^0-~gR^gVV9^|OgwsCB)xv5-;b&Rq|H;sPBvZQ7W0Y$0%cXB9%wntN z%4R4x5A;EPktju2^&d~z{%m!MlaOimVRA3#!-R$jM0l} zdeq$?J*@t(Jml@wxb@n)n_9SqX6h@dq~49qa!Df4{+#4j*Eb#1S3NbUg{TphpXs$` zTVVcvdV$@TGp^Puk3rS!s5iT7h9YC(t}@3KToVL zvn_m5{o^=mnMLNepZe>W==e`tNy7NW*=w$~8MtojKdT*4eIP$qoVC}SaL`g#&}+hv zB;6kG@X+yuS)NIX%lwz(U)H@J*=5Ubh#7{qS-}EYYh}$&H$REhQ00zcG!{}BJ990{ zualXt0$;u;7@MKT@j0IiQZ((tgaL!*vv~2JCU8kZ*}63aTlfmAQG1}Ts;ZB@O6^kh z99g5z%W7_8-&V$7&QD7BI-@^c@hgl5X8~n6<Hr*j!^U4ku(tN^|5y0;E z8;puN?6zKT<#hjodQ{_QLty=>zYrV%j*1cOAREL7GT;ew2=C$QDeSoR`cAGKFBz3c zDj{+)qqGox58>Pud1_c`DjAd|=X25rO>|Avzr zQ0lM7o_s(v9+7BkGv!)8PP*6zsEB$$o-bjvk|Mwxp03#Y0M@?-@`~9w z_K+L~xVdKEBH?VcHe1-?O8gFOJRm#6;VpeoDwCgBBMXOBpGB6Abziu9_jyGP_d+w3 z(0}t$(XMCylLrC<8xDq)b^MbgpZ23q(4#AV$grN&G3`KKc`Hs&{!E(ahN!*aWWeVK zK-{!9a$lzMNZRv(1}VXbJL;ZnISI+pZ&%$a!QeYVKK)fIyYhnZ8)K&F)x`VI3BlsB zMc3C_XBM?3*Y~_B$uJM1b-e4uPyB_C24uPubb^hO-(#c9pYh-A`Cts0ro`+ZJ$B^W zo{NRp=a2SO$#tHj1tas*fpSY50p-8ZIYyl5y5%}rjU(q-S#yygxf$rj6R$~27oPI# z&*Z3cF01)282}X&C?`@3ffL_U-ar!K4E$u!wgH0!p*#N3X5s|>dHk5xbin_~ zrLxwq@kYt;(P#dR*A%@8Mv~`S^T8InZYVo<3tKB^JW{t+Ag{dwrV>udFx zrO+d!JzK_?)1pje!~;HKWl;xX=fQeu9g8zxj1pa#z3OP9%Cu*g>@dZjK}z^0@JYsm z_%#&3I4thC6U;k=V*<${r|p2y>p38dF-`bj#~%0pQGL#Tmt8DcU^d; z{W7^|&7$XIT;JH+(*=^RzZ0h%%-cU2Eq<(>0b;B-G{szZkJ7!8uk}2zi|fpd4}G!{ zcr?jMaIW-*hlhMMYCcOKcTf(6!Q7Ev`@Y3 z$1B{Lz30{FSyaxhe?A)e{qsMsSkUUNT@-4qc;t1+OnLLVGGut-qShd2`x<_!)da4% z)5{1PbwdOGn3p`O319BGnqCswQu*o=>VzY&!_2}LG9d1M*wtd>Pw2cE3Rn1aW2%Q1 z7!Iri*?=n_^$`8aM#UVblj~{d?0=M_A4NkCxgXT!YHTQ$2^NhOqM19LvWoXUh}su7 zlwo0F(x0mPkgQV~%kMNyZN|enQ_fJH+?)i!Wj#Y1;U!&v3jrf6evSwk(zoaR>XV!a z@Z0iwK}+e%b=&oLoxGkK@jIJI~rL!EuMed<0wNuD6K&(D&h% zIqkddY?|3|2>#u2QJUy|nB7J?GvMcCvS_KIM_!4gzhmmSD(^g$z05H)Uui0uTE#D^ zTne@@v|Mkgt%TbE)J@40M<$BJV-yFAy$;*43*Zt+UCAMgEDh(<%OVt;Q&iznHbnwZ z!TmO9W=SSgyqAj9NRAaASv0-~MDp;F;CVs^d2T=nYfGwZ#^0BYjS$6tY$#rHign@}FFCm|}~)dSJ&sb><{^Q(pm4}Eumg@3M|b~ANn z$>_C#KU^Za>>CP_w|oRrZYv?WpK&HSr7gjeRzd;qa2h=5W=8Om7 zIVQ+zMb3}&3(`o}H~nUkXL+oWye#bidYiMUmJ&ts;b) zE5ZKwzC@@FCl_s^Tfcd5E%Uv@Zy^1zfQK7^G<$&%L7{8 zwQl)v{cYOnEuHE}nu&Je^Uuy>W!~Iv09YggUElhQf;Hki#$@ph2(wOw46+D5yvxS2 zDr+Ix)a1{7eBeUStm1y*u&qGXtRyM7U41}q0a!1?OYFUOEVbNFTKR5&J7=Z%WkMw^ zVQKa~b;~&FABUbAO-gaU3pZTO4TIllqubg1;UiM7GJsiO^&7Wy1amJ@ci!hT1go`` zuata*c1b0xEx4CP@2yY>0gkM+2Jn)V;(U4jkCANtsHw;ipTIM*h5xNkp9UEPeX0u} zBxca1&$EQX1n%iFF_+6Av$lJaw8O|=+hpvyd+l`W9byF75z@z6*h6M(yBwcX4N#N5 zy(MDvOpzL3C)8Q|EkjEo=46VemK|yMKOWw7@%k54xcN5v69U=?hB(h7Kw2lOO`vDm zecLv^lN;JvqdL1Mo%ij-?JUu#&`qvbHH%J3*ftxtC=gFDUsQwcx-Nh5g;uXo zPFeegs$oPh1BANpji)!X)-rcSd~|sx^s;(A(@+z(l3GgNF-n@jj#I~hT0jWNpuEB^ z;9LFSr%{IA|Mb=$g52l2S-RxeN#Rc+m>??4z<~;iU`*Ob`Rj8EE3gwlcWfa zt{Vw~6^Kdj17)Ru!JZx1t8qj%o-Atco}I^)^k^)^N(Nd$&h@ghX_R1qN)iCoo!N@d zzg53OBs8-FPx4B#-Ux53QwIS)fqU!jLSz~~qNNeF*LP#9e?a~3|Je}+BJgPsT^_es z&jM{>FN{|)7#vgYlOq9lvtwCyYzd4^pPzio?c@XBT^}BoxJx)3UGFwi9N@Ok0lM@>LM2R3BhqjB zycA8fK(3<9CdJ`##E$Fvb>RFi!Z?k^PrX=fcjWG7L?+WwQm=qTTXC&&O8O%3eqXfI zT*gWi1_cJK?Z%t!i5CG)vGcFqqRWA_xhYZs$CY>sF2~pPYyXzaPvGF~pIl1IR}27y zOWIQU76s-Vc-G;1a3us*>zg%_FZ!+b{lk9EHY*2a?BNsEo=a?Xjc%|3)7@f7))9!? ze%5;2@-ADQ8@+8>W}jIXp;z`}qlZ%%aYi$fFY(O7_(!cqoCMx^NRKRzP5T_YEdga) zdygg{%TP{z<0|#}kRlz8ifHLFvizTm1!d@(sY&483;mErQJuxnVz#LfT9#UZ7o@$@ zqcg=j|8vw{RG-Cn%Nm@t2{(?xAhYi}1`j3kJM)J7bz^xwGWm=tP3zP)kMnshHLQd~VuK!!{NQ7v6W={IUPbMGry5f{&V}A=seJ95!`n=K zx^zMPTDx}u&S>7&{oO}&mv6d6gFmGC?)_!yWcd>b$$PS-?2X8q+^%m&e4LN%R`zJY ztGN_m&}udMcB+i2?yL9=O!*U8>2v1x&__5K070qx?}OA{6A*&V$NDfW0@>(!vTxR230g4Qde-CMoq`0*$>+Y zu!xw_vVnTnIp>k4(_Coj9E|oajQM(zR_}7oAl}WRWvam06UA>zUk=ZhC4ToLT2kjy z^J*mk8=d*@a}8+wge(XXqa-D?l zt;a86Fcys$)rdF9j629D+$Vt@RWH?eew+AA0DOky*EZApcg>1}F@07`Z_USJ8#AJ- znZA*^r&!6fyNPi*^`?!N4rJbu_L@^n{MXG2b6@L?;Ill|T7(HAjN#6I5==N{@2}Ci zrZS4T?#|(pgvSzyqDQ&;Q88XaPfMi^xU5L=3{rLxF1$j1BlJK1#$S$^;8}S;{@F|) zBPZpJ8Y2n3n~)|vhUKVlX(U|)LRK@aDXeb|sM5wkH{?upOcQ%(-w?SSMsJ zS;KOYX733@9J=?6?EmsTiv7?ad*&W^a}BYGdFGMia6(6N)~plx7@%>*TVrxY#wL=i zn)A56d2>;rDjbiwdDV$q91fCHPA^cC9uD`iJI3a3bIB1VDouwklr)UqWSEGI*IXr6 z+6Y()$ct52{w;i8wo;ap;Y}#o_Py4t$znD1bB~dqdCYdHUm~P;3WSYnp~y9t8!?51 z2i7t!r8FVX#=y6MmKsDeH{C~cXj)rOWE;eKrJg(0+mk=SPvxyKLK%4-{j99U!`o7- zG}6B=e>BoK%j>Qx`-iXCPRoZ&`!WU}c#75VYrNA2hdkeQq5PRwwCCTJuLVJB?q%f6 z6=RbZ0kqAQjPI&>H4l78kFF+suk?G5+r5$wpK^oop z|7QV2fZC{aHymC2Dgg*{8e?KgA>U+dV?#dV906`mQ3Y4F9Tzj9HsTC< z(BDS`$vsn{PqK9z&?G$AHV_e&p@5)okH@z&Tj>^v5b_3j=6u?X$y)|#eqJ5;zaE?k zUSRI#hm_C3hyL!|Avquv82Ja?*y3@EX(r5`K4RQae_{h}-`oHB>VdY^uaYm1nnFF> z@*UjgHI7*CzLM2itt#6(d$;^;B_+uHgUHJwt1ODq^s%rb2%+?i+Kk2)diR!3ZsW5O zOP`e!s^eS*%Js)af<=KGW^BN^%&ymlN#;^FZW+qIl4T6iXKy(%6yeez3l#3HWQ>lZ zV&Y(_-)GCu#XM)>As^L^*v}&mj@hRDj{9HiWK5D z)cgu?#Y#$IW$;Jz=-|g%9tRVBo>|@!`4957f&bzes5@2xbL!{}aTutmfqYBR9-=m2 zmSs#&~U9<22Iv>96n?iaGL$vKgyxZ{o%eH)@ z{D6E|2}}`U4%Sy{)9v}^3er3 zAbn8w50r!B)SpMh*J9h)vwulRHfNCQuT_D_V-+Tte0&D0D2g&l`gP#Q5tKf##`+MO zSr_I4WrYUg5uG<3q`z*2Gl|%eNt(ak(b~}HXsxJo=WvfEFZpHYlC*=}W~V~iSeaKw z_P4(w3}$Y!yQV);)3=V&U&6-c9&YL(z>|>&)%7(=f9+R?L+YjmH+cyp3^i>u-|%(* z9+2hCi?nDd$h}F0|_m}$a~+KYwkGt%;Hyx-#$7Y9j~v*ja}D_KT{f(^C#rm zvzh3Kn@2m$rW zN?x4^ZS;J`o>ca0erj59y`LNUgW0-Dod)06t^Tjg4#g4S#i;#m?CGPCX5HprYFLluFPO9^ z5l3uX>W5znet@?s?cSD?Fr&oQ$t8$@7?`X z5mdG>bQ(TCeU%&~1Tvx#1tOC(>{R-X%Jron@#x`Qf)!ZxREA#A_D@5(_x)6V2>$_f zp)Qc+i+(5@sJN!&!pm_xxtj|X;tU*=y-?QL5V2%}Ed@gc?eKXjNY?;P?{NxQXjHVO15q{Z??8%n-6lEqoutr|IPDpMbd0jcIdm@A{uZWo+%H zSt4QKP0GX?%FbZ2(X@d+wF9#xc0Pz})9Ir`mu!V;s1)MIEjYqtEK@wi4WTu{xydxZj7Lp(a1xTwa(M z%!)Ve`f-0!L(ZHv_13_zvF?%9*~!R`kQ*1iO$B2QtSWyIER~nAUz`u!k+A7ZTwkzPfvNu*l?flH8HAMpXnlQi7-4h# zvE=?>_{WWYs#?Y)C*^)g@O|3<&~)bEQ2p=!uT+w1lI${N4M_-zW|AbSrtC|ZWG5-I z#mq@UvW4u+Bq4+>5n}9;Wy-$9VC?%C%gmTL=X>6t-}U?3bwzXL^*Zd=&8@Yh(8l`jabXD2b1k6G_`)%v_?o5 z71DL7*S`)MMw(%xGM4z2QI=`r@$&8f-AV;v^8C2cS&xbJ$A|O<7q)4`*3e7fPCFTH;qfw;%O_URSrFTDY^I*FbmY@`UOAmx6x9z_*q2qf~+SKQMYI6@72_)vTO*! z?p_Zv-)8}&4%f}KZ4KD_fY9qW3U=W$;Czn-!CKYfq;NXK1FQogaDd*rlEi^8hJC+Q zyY5>|_)M^DAb1ECF+)^u-Fb($4M0*Ihu|~dxT~?)cnc z!ik(=IPyi~$K1`$^J$JZ<-iKb+i@*k#Z|Oh)(8gaqRYfwzC_CPnR8dwQeRzi1Q|aH zHU4tO+Tphr&s;P@v4Twq;x}~ODO8pJeDJf2e7>Z1V(^z4%ik&>CxjChit3A3#s5H| z4w8k3u1=+-8|u8y1^;2z+t(ZvCfx&1mMsPxkBa`ax|yrL=_?Yc%CRfJg6%8(URFrz z@G+MD@Nh0u9{flA{0NQue4Wpp5O7pX|1Tl!FF!gIa++W60vIV%Fy*h+bkPegqzudGBep}@@NMsdA-dxecT}HJ`*f}O_R5XUD zvW76R;|h=oM`|Hg3o42_SAlamExT&4?8qoK?Wd&XpM$I0@KciSi>QNC9;Jc*6oJm` zXrnJCVb+>zBYq4QVMn^;J;VOvU$lY=|__}HiDsHl&2QZ$5AkYj*@m|i}0Xo zmaup&O!hp^1`j9g!yRdDA zKe8pAXy&V?*EHC}%C}MZ7Ii=-`@!Fob`x)tE-$sBje%44a{tOEd|Fn5?F{(F2iJ=l zcv-rF2RsFrCqhFGT?}ZAJ`$yn$U;7x*5oY|xo;J{iy@e%3_nr){Jp@)1;`{Sf1#9w)?{4!X!m5cZmSq0b#+7Mt32GC4gsX)yafBNND zis^`7>11`)E?~q8=ILo47Gmx(2`iptB7en>zy1hnbS{qNJ{dGxc?Ca5>OV2?BprUY zXoBIl_>c~}4`AO9mg>n^+ra*NQ{NL5-5_P#DGd0~2u1&Uv+PDi5wXxHG%W(m`Ml-7 zeJ&kv5N|%Ts|PJZ+p(3Fu5vRqrxoj%*+}{heuGoU$pv59O7_E*v|>-1)84=C;m zPUBr$?qc$P_CH8k&9SL!h$Kk9+)&%~M(?e4sp|f@vgIn)FcJ7_>|~T453|a_)adg1 zl8BXy9EBn--7KX0^?o72RC;yr8JXR$gO201XyTpzcbHMM&I#=kqO-3GI|eL9-c_}U zybz?$eAYF4wbUcoZ9=c@M9PsB{ZCqlZPL$mAee_F>;yj7ZVOl`fPb(Dvi16(IEa{d zTGLz5*SkEMWeR=T{y0vIR=fFgvy4?nN#9Yy=?;4{QZJ3?yZZde=+oPM!dwd*2(HDA z_v9|eRkXeIVcy<`A^2HBi?mtY>QmTvI!kQ&?}HiufkO0)zhx+xp~_}UQZd4OIe|CaHEK!v}? zsiRY9IBP7YAGzk2Oo6_m%5a9>40nOgEf7BcTL(Y_clGewK;-qAmm`?*Si41Q_2muc zUx|v~0G1il9hv~4{PA$}lTrzfJo$lc3WYW}=mTj40GfKzX-;VR5*c7^Qw zwN)(xKG~aGKWugIbf}NaaMjEm(`e6r2F7KEvqWCLy@)*I8z9*$LAG^!7NEd*rvDl zxj4jCE})%Y=ZR#*X{{#*PaIM!pdp5HNFfAcxa|q3*6YbPq^Ef~w~Iql?JqqhXihxI za%?UzoAr=&;aX4#Cf*TO0bW^Ggq<4Bx6IyLHh_)6frRm!^7*+|L0>1LP>#F$x};Td zH#&!3^WP?V1A{3a_pJkCb0y2aksd;;>zpxDXgIy%=?0CV;64TXPU=(eepIN$w*q${ z;df>mlF35M4`7Pe;H`>8_$CTP?SP4BVmRV{@A(RU1iPjeZIJ+pQDDd7ktb#Fsc~hg ze*Vu~rgFKQCw#sd4(+cv7C<`OP!kgMsVF-+a8{E*W87T+b`m}JiWJxu=6x#hnY60I8$$D1@6M^ z82seO%q3peBYo|kebkP8VD*o*aCK+H0)z8Hi&}tZNf%2ac%vRbL5@>q&};W>*7dIMsD+3k(pO+XWy#&uxMV>Eo+w1K z{R4p(!ic0W%K-*T{cIq=-eamStNrJgy(45zB@fS8Ens0f2!{9bp7aZUi3ee25>p@o zSpYnJ*Y_M7ij8YZ$4RHVp8ReIZf`?R8Ed;(upu*goR1xI(7qZ}^wmLV^ByY~IiG1+ zrlncmyIfAN0O0?L6J&2&xvWwAdKMyiI?qtUP@ZUhCQ6ku6`Jx!r}{EqF1m4{>~?%>J;JXVwbwB zxaQWZpj7(~@w`dxcP%qdk$qK*?E-p=5)eG~eP+85Vn6i(i#&cN644*OTq!B^9}T*Gt`f7xco9lr{HzU-6WsTbhqZ+x)infd6}zgbL05B82XeJ`SLol|3d;fCg)eO3A-?I!gqucQP0WJp@3 zhOPs+*%o8|Zs)vM_shJ%M`i!w>0mBJCYH-HM|-$&eP@E3zu7!a8MGdnOzqoJ4;prg zxlJ3ZUu=>cJGT${mZJXiae)fzPWDNW;H3-S_^0>aeq5VuL`0E}uPz&D_%L4)bHo=x~)_CVP%cPeO)=AX_tOg-QxEB5EzJ=xsa8y?!1icY{+HHU?1+k^xLjS~xsJ{hC)R|@ahWhLKbk0cV`|H2LqvMgME zo>su8lB=}Nisl$8PP;poq}? z)rBxzl7^Xmu~*L@Hjwf{J#DYOX(43B-0wGl0hErC0q;QE@ch-Yg0M)w5kMv`w3L!2 z&;%lG=`#Bd@xDziLbh+A`$Q^RbW4M7HVn^_7UEpyv2-8!pR=yK3pt-m`PhAxYQ;_! zCp?GP69xp^k2(l;B5C3>uORa$YeF7-Z#))0yOH#&UU+yq;IV-10NjU2=fT_maqf1h zevz;vBK$vXlpZA=O83MfIe)NO6~!F>6@c!d{C!RBK)Eec;jrJv2=+d1G#m^^{@U)B&->1hN68P!HIi%3mgZOUf-j$1%~DQD46pq zIe_=5L?C)d+Y2;H@n2QKw<}5=q(oV|wT;EqUo(#ILPY@4 z`hgWH|D^A>;--z}=-jDY>fR&~(5p3Z+{x6Hngi6l$Rs&71H^8ODw_m3^6riHJ$++X zRZE!$J57qkU~LEzvs^YRUlld+f9Q{VGK zh%A_xr5?~mZm!uT-@3yh`}@K%io|U+-HHpa^lG9#8F zsG(?g>-Ar^kHs$}pqh9bcO_vAfY$?Xl+PR#k+GyG!ga83DjKd*0v9F12q9@lTx7G25;@#MX-I_gzAt(&_+vb&i7C6r>)ASXpN?z z6OUzMP9^#eQa7+K=(8`G0Z@b+7$B?P_-z9e&g%^;H?lVVtJO%E1(U-*9jnJ>9g`XqLWqjWF@u@rLrg zy^mAmutS(yGc;XB@klvIJCE+K8QK+>4hCbBS7PKr4($GlTGW5P%ar+6bVPD4++V8! z?7&~jP`P;fmT@A(O}dM?O0l|kW59ytxJ!QW^-^l`PuL$1?rss;n#64%ykps-`v1DH zI%Yg6w@I8Rm$>Wujb*kJ*&ctnON!aPf&EYXo`Ux?-TC>B#P)%f$^EEZm@x~s0Iu-+iG223Di&_F z*jq3^MFS+!AmOWMIVd|p6^z)Cz5q{kDfQ#p|^Vk z0fPH&+OW-m$S5=Vt(df70itNip{s+raH=5xkJS`;g7EGb{a$cK4zTg>FHwJpF{*fb zZ73*}pCA7a)Aah%W>pwA+h4$oWrv9Pq4DeKfMXkRldzoo`= zxS&S_PH;fvt`Dm~uo273Wx~(tlvWEtp6$Nres1Cs`XrL+8h}{<=CK_PnwzkcI}|9qf@b^~W(4eVdZFSP`i2Oh_Y$ZW5@KRBplN3-J`@ransr2yCU#w71Q}HOrNG ztoG6uNEb@rao)F(xoIS%ead@RYY1}&2@lv>C)0m2SHFl<-h14>dTBcFz&X;BlU<>v zX_p!38)lQIHRRikLHZYNvPC_n{q4S24l{axN({f%AK^hIW_%uTqvv6iO%ip##2CA_ zeq(5br&;qUk;k6Wzr_RZ_@RSi4F6mG4)3+ieTUYsh~EbXK4A27XFLpKNADyUHx>!s z9GBcdY$e6L$Dj1!ekP+-@f ztOe-~l)yO7viIZbcT^MK|NXgHd~!SgEN30E>W6p1D8Af+DDXTWW?r3eP5cF+E4%}j4dEZ(E2u+ErYN?2DguJaxQnPWdqx( zUGM-*)qk7{!MocN#bE4?^6jG+$C>TL0OO9W%k~&9vWYy^<)5i6cZ+Yj(i~#`l~NLs zU$3~_B|DR>azH`lyvH|u_tf+&>~Qd(J%=V9?w@<0=nyde#mA;8AVIl-REjQ;UHqNm zG^s{;k1mpo?)!fjpkHO$-|6|fVEz+{eNwuOMyK5!^%`#-aw3a9}4 zm>pS}h+mq!N1#l|9#&-%apMiSfDa2pcXx%Qdq?&ho!5DbjHugS{3Rj-7GO~YW=Fv& zD)>Wn7XjcX%P7e&K%;q1sjiq>IYk@B!`CU*FM|-xASI#84^d5D|+_5@+8u7WA5v?F@}&EGNDtu5!c5Qc%PJj%ToOu zoC@l({zxE5rq1spft31%2$Fq?5h3w)u~##y;7m60QVCS=gq|3Z>zLKP(5(qv$o^4@ z@kn6n9eFq7EV0FtwLbG(ZJ4-_$Q*sY-)Q38iZo;cM`mV-z$SvdXN=mANc_eUpuC&> zPrkRhFT0xk8>pP%LNkr$Ws~do{60h!vi-vwVEFCm7&rosBywb z$W{4F+6t%VBCSaYfcvnEVTP{YSFht%JRvW%C7RTL0L*u@61j%!|t! z===`$+0S+v4+bs=PIA65{K6k2d)M#yYm&ytVzO50rF1Iyuw|!amOx8mz zio35t)hb-9=^cR~nGxO8P>W-c$TJv>*e6$q45UvIYFg<`@bBg)8 z5f&V4CFnzs&30YELJhdr+H@bBO8~m40Ao!Mn*iOV+C&lss5E>)E1Y+>u=b52DV5dG zLT89LexFm&DG`_uVM9m+cf)~=9k?3zkyXh3NpEM^@WG`_bhKvxoO`CRSYJtS5Kv__ zcXrN|Rr}VRoJt+EKl%4@wG{1Fugw0~&)}ps*qmwGSqq?lyFSI zb_&hm;hYxBi4zlZ;(vS*cMI60=6$YVBEUV}%s)DmoxrEmM2A7g0W`N2fg{$D9DtX^ zJpL}+q1AK4(!#F^`a*1dOf)`Pzp}ZuuBm-fl-)tiAjWIi5FSED)3Wk(?yTj?zzwFr zt^;(6P|68?>PW?T@5*2nC`;qS)lEeu=41mo2tA>bWf%g)IBpdCi~-PJH0X$C6@vzz zkjKwTe{+B92uf3 z78(9e>#=PhONQ(EQGKz_>mC|{89 z95=M`Ok8jgs*8})K}`pEPj zB1I80Ss$8P+r0vPYJ0Lk_V$%PeFU>XS+6nTfq4|&uCdQ?H+gr5AOCWD1O@MXe2iXM zKL`|OunXEqay2dRDdCZiCS-MaLHSTSEcdw>ln~NUw|=w$=V8bi&V}L>NyK%U#r{{v z1r7d7CETXw-gfRH5Zuut$!8w8T2b7GNrlv<1eC1T6}0TtonQYbkQR)2nDYbgc_=^% zL4MuLrHAX}#9}0Ncj%_pTX^e2nE5}BUoU=d#-0&U7WEB#f@u9dTfw;Ce6?D|@c8l4 zn5^}J!`!pOTp3R53Io-{$1&eO?Z1s1Ci$HF*zqY)@zHy2Tgr#LM~zRUrn~;xB{6tU z?OTpoIf0gU%KY_#DFO)*Ubse}T7Xci`C+q)gyRQ-@*YH9kCEg=3Ki%@8m-mK5NnS! z^kVKe)fd}Sv^CK-YZ$U0J6n{tH^m|BWrl9_Ob_HD+g6`5U7Do<+dPG1$yE z1q~B#>FCaWDaM}d7q`9#6IX1HVLyB_XPgw=J0w) z>$Okoio1iXy>}b9*`aLGDRt(KKJGjfxyE z)miM@<~%?LP~3&Bn z5M52NrxZB!>P534{P^*>%O;?i)IlwnnzIkMKs^r%!zEi06}EU`x)HqkMX-(oYE5}Y zdW16E7GQk`=VV*@EfvC!kSgkY$5}4!1UK-;R0=w)LAFkntu$DMqBT==x(PIXAsXU_ zpt{Te!&bN#e=bDirldzYFcM6bzUED5oRRk3ske(h-1~8#o`oUvyr)FN&)LLSU#<1_ zgO(^8zJRck@?{;5TLp#-#_~OmNMR=vTqn8l4N_%;!pj+Gvz8M!c5{8DtF`l)!|zP! zp21)K-V*&!nHcDa^nNx{~E1{l;$1C8!%|2)KX<0$2r zOrPgcW|oVw0GPr)2@p@`V#kL`695O=Y5T%q|0i*NW6YgvgsW1-1Bh+ES@+!6J(iL? z=ht!|RY1hDD>iw+w}<$i6RwLjSx3F6$S@2}RK7*xm=DX5ZF336DZpRgQo_>}%P8r8 z?2fE$SSg@=orI;-qj7w4{w)3X)j|Y$@R2*;Szaaop|m!=mVpiUMGjvvdb+fYercGd zstP`()U3jvdTWe%uubbP*x$mZ5>f05iYk=Hpzt%#x(M;jdp-q6OpZ{4d3W3<8uEsw zu&(R^9$W;?(JR;B?PCgw>*`q0(9@bBbyXR8WxuB0ot&Qd2QKH9)Q)r!s=M8C*^qn7_l4kN!UN zM{VMOs=;c25cL2cVm~^wJktscw8@9IrDP$hdViNc@4-s1Tzfb&DL`rf=8YCq;5&y0i(A48r{E7>t0!;ey?SJ z9Qp>R+@6Bc zRCOv4MUwdW>>jP4l(3)ihRLQI{y)I-jt7`E{LMZJhGo3rfH`UI&wX&>Mzf#Bh_h52 zJUYr=o0~fe4;Ea(2PINDfo-CLK8Z^L=Nvl`TQzKE%gG6s)Yi6l?(tf)2!cIil2xR` z+inu79{`*s{+b>cF8<>c)is-e-SXS2Uwjsz|IT@O%GJgiu4tP9f4SFjBk%&F`0lmY zq~M^CwJ@`>!{oy|bT5Tyff?^u($bHQl}-7NveHunc!4`IxQ7Z!RO}T&tBS5sy;6+e z<*B%?;@UIALvERxtafF3+~Bq22Y}Qpb;EzRro{K1{rF8CHccmj_6@1N>;C$Pi|oLZ zt-aNBGu^xxQP7rwW)4;?z<|#sBf87HaQRK-Zl$^;j+ZsiSNgGro{e6Q%|2&bjnaQ< z38ZiTMsa(zR_?2M%0e(B-~nN=H8C1)Fu!5AKHt|NH0H+l_t~xc&SU#B$izf?2e6g@ z1a2K+w~A&JzB0NQgIm8?S1!t@^4PEg;JO6!u2t}7fz?Bv0skE(87tLsg{q(dr zVKRdsF?-K^>`z;q;RTH(^j8MW0=axR#s&Xhb<0%~OlKufDca<=?z$*Gr_6dmP))7Y}P}9omojeuV9xJIqQ3CatzU z-g~fAE__Hhx3)z0MJs7$33LmPX6-u<-l-R%+oP3jQ#WIde7~Xt6n3Y{YJGWj9404l z?;ZgypihQLJJe9{_3drHUZR~7#UV%E=ut)CnMs&&sGwoUlCHb#PB(_ra$j{5F40Oh z-G@5?umY`UoNb0epCoSnX;yjbSG(TsAGXHJaijdS2C+I&vHJj+4EPT!ZsqP-3u97O zMB&1{Z5;vjMavT86u0t9E1njUB&sz4ep&*`RCRItY!Jm7vZCKX{!@K<G{nu8{Q!$-n|3+~rBZ&bG?B?emAwy1QEd<6ZQvc=rCPBX50} z7fYuQgFUneD8euj6IK7l_SZ{ea+Zi zrSf~2WA!!#^v;4d>zv!aXNE*=xxpmz;nG=>j_9#%CS2?+!OwUo`ghCs@*kKvIGeV6 zNxkeu>x(ZQnLlFg-slvUJJBJ=P>{*kUvJyv&~)c@3X$5}N{fjJQs&G5QG>ZAmb&R6 zL{BSKvY1mlK=yG`s|#+94yqs=T@*UBPvJ|<2A%%PIq92`^USOTV(R(&FuFe9uQYM^ zAawm9CNJ{Q&ieX=wh$iKuZ9oSKKwDqjLhDra%yw0w|M(?-hX^Kxv{P7y29YzW)eSZ z`Y_5_2N}h9xZ%&#BfS0)2|WFL^Ad9~%)^_Fn~?O&0po#juaX)|bWHfcP@uQ%^zCcH zxsna5{k==Kl8^009z6`qqeuFK$~~nI^Ier!zI!;Z^3|{Rhp(KD^T)alFK72yF}mg^ zerqp>(&Tz>Me=s<&@Xm$w!cEL2rEVt1KV#et%v^(7l5YGyYG(jXD;3h>#;cM*SGXn zrr}`v1qs&}{v6MO>v_|)*QEQO3!KVamwhbN{z(3Xy5zck*&!3Rg3XI6$zK~D4@mTK zLDc(KouzdqN2kJoNs7$rq3FAVBLPHJ-HbzntV-#_VTcd)8=1o}igU3MRh5v#ku57w zC!|@|miaOdv&>nOz3xhBwT61iH>--#{ej5Wc;p4nia4|`4EPcnuux zFGGp+xv@i0Fkn!T?Y_{o;!&e=F&w?s(~`(?#C`1J<9jrayR4^K!g){KisV+afouP5 z_1qa_aMDNc;%(v?|DFxNt`*NewnohAT-v z89w!;iz?ldLre($0f$=z6z>Z~Dr{%@ILJeV)GOd_9ujbd1EVB%hXU%-_5QOCl%pmQ zicSr;>$OLJs2fq#m4 zU_jCn%33p|&u&lsoo^z7Ux0VDNs2H9fUvp%zjiXUa>!2Ac7IJ zcL;B2eysA+zcwEeBTUiPt+gCP%GWK$K=G%p%>}P$88=SOxha<9Klu9htN`yttFTf{ z11Up|vgVEstDwF6ATENW*|~YTtnDGGz?Teco6yG-dtUf*7>)kWTCa_}CnZwXAB25g zZP~PC0OqwHmyx2%Glx#L1=5XuVY+P)MC@=j|&6~?~zQ>hxM*2nyKdzg8Y{RtwFOn{Se+~pQGd!dif*^S!~r1($( z#M-Q_2$XZ33>*lX)ZuITeo^J->#07a=ugSL-i^tcelCmshlj@wi=`&W3LQL}BQ;-k zk^?(0`5J2;uY2pUX{!XPQv7SYkf4ZQcehGv>Wh*5IiZ$1c`bKz-MO!<^;tM(|ki zG_t+?d{s9+8$*p&o^hSZMf4Md-DJ7S&sG*`EyCG$EOV&22m>3_5mfSjDR#_;-bgZ< z&%!LiZJ_OXZ^>H+^y~tw0RH;eg+5#0#aY2ue_-+0U4jr%B+bO->;l$=uI* z3CnN!a3%8{Li_&JVGpH*L|$IEK)bk(PwnyE2PA~`SBOFuP?rH_QE};DIJU_FFTTnS3I2M z-npp%8z8bOJv!<94=KgMZP&6MEf}dW|I&*&2vj6g8rMc&+I8F1#N9rtvMF45iSpi# z!Xa}|NwGv9le!-EK`_F0&u~Qa2&T%ts3+sJ`8!)``yY=N0hbu=q3#VT_=6&g0^MrH z$&YgcFm$_pST&*UZTjcm9l~dBQ$&FAsSkRa@SMQsng}$}v7a?>d$j+J0qzM(;a05- z+hp_zsh=n(hcnieD$r|0o)wXDc%EmQOUggVu1aAHP7v3~R?KkshyDSYP*-^9&QFhl zP}R*$KqV#@`%8W>87(W+wUBOX@AaZbpCG5(4v+0o7Q!i;M}+VD?lNV>xDU1& zSQ%zwmWj)pg~~I{;~xrU-Gw!v*nz$+Id8%PQJHD=tlLqu7jC)1Hq7k-;@U!w-&)Kp zl{C6O+?PF^x+eLoOE6w5KJbcMO!@_V2aH5^c8}0?#F4(|5^EQ~D9S!i3r+5|`pN9h zovZ&FV>)Xy85iFYE%U_N+$j9YO33X9lLb6sx3B|MPc;fHULrRr) zLx&4JdAu7rGuIVTFxwvA*JmF2sTW!1#^>|+tF7w~COe}QotGkt5Al~OWDacdR2T+P zEY>Zg!_vOm9d{`v!L3pB;Ug@qV~@mc-B(sL3DOEhy^u`AFy9a#o!%ggJd6yKc|;w& zCitT_f05kiC>TDpQT+8Y0%ym4+~`b5-hjAuL*hYQHP#Vh>pW90rOxE#Nk=8C50W1H zx~(`(|E-XPXP&g`hdH2gV&eqVcJ+FM4=GMN&YtK!ET3Gi6t8Z7FXmn6#B|7o*I!Vt zorFb52E)va(rdGZH!nOF*XBtRicwiEtf8oU9x*sd7*Gu`Y&WJO<_W74!Xd8d!M5D>MjP4WA2}FOZ8K5G~GHQWjCCd&-scDPn zM+CLc?M-++6eep~VXuGE+1g4vwbFr>KU+1uvwOp&FeUN(_IPe8kCmn8TJ1oZsvd5u zU;Rd8zQ&-v|490ewt*(%0VPJ7G-onQGt$!Wo9BQIy}!?KkWV)YLFEhO zs~?Ev$vRd)g{7At18P5N8NW}iGZCTmic84xSj?BJFa|x%MZ=I>J~$83p~rkex~7e? zvilGh@eS^$4w;=^u-cHh&L#U%56mTs$w1~#d+KEhE9(YP;%BF{h#!Vu1M4ME4m}QY z@6Q0v8C?5p()=Xhl2Qzi0Z1gK&=QA?wrQ+_LpNWaP(Zc+o1HpraBbl`8oGQ)fYC;^ zA-He6d)wYa@K;~G{5dxEw%*B9jhm5COhMz=i$oJpK-@e{O zG#^)Wj`7=`!{()uzewcch8hqzCR3xn4bAKLJj~CYnB)>HZ776Dv&}KMVn)k~D%YMW8<0r`a_OHRZz@@)w`>(F6|^p5d8-TOz2S*INiq`H zn@|7x=k+V*qf-InAL7IA1YFZ5j14toHs_2NY}ecd6+FJv!?X!|7uXzM35BZM&69qh zDq3*u?f8}~+wVF=+j3v9oxXhUL@d|cP3T-FtN_oEn1) zfY^f^suZ=bzgkJuj7Q3)FWsKruffuj`dYFF#vTV_KIQF*T@0F73TIjK0keqrg~5ac zhfv~`Oamc-8Ab#r&7OiSIgU%SbpV6Tsse$11QrI8ZZx3>_^pWH{U%^RtMOV#W^i-~N zrUgjgHz#*i8_(K&)*1Hg{3;BMU{zIX<&wqU>!y`@ z4!=VtNp;C{?YUhhPOGmG*S;9As%*Gt^T)astN8*=V?HdwU_HJ#hYf$jp1_+!+)Fx3 zBu?n39wi>dTJnrkEq#3Hpf{yOU z4J;qYy#rdr-cRycR&87Rz9BN^y=d6? zMps(Gh$d^0KBN2BIh`m~*L5rU$F`lQ{TqADPHa15IiT991t?HSv|@%fu9_o}`E^bJ zH{Qt6ltrWXfw&gRV5UGALr_MdAx7u@T=95Q!~F%4lM^Gb^8>5%Q%}J|+7GilKi(4C zmR;4)?j-yA%h!Bm_IkddM}3M~`*3lcHfwlHXgP5rPO$4^PE*Z(e&5;OA;{$qe;q=W zCor-ibcfqcm)%Y7Djls<^?KH#zE?dUQTQQJ<)u$uMaY5p*100$!7F%aaE3g; z_L@^ynjO8U(&EVQ$-_*Q(K(!_h0tyAAvIwg!9CqjidkyLEX1{6Z&=_#&oL?w_Q0o! z^E*Rna`Izw*CQlz6HfJZMWE`_SkE&s9 zBGTKjq=C+~_yoEi_4JC$q(({L5#e8@A%^+6&6JCs$xcoC_gTL*WAz^Ocgt4ZXbH-R zE`Iant^;|!vO~8zC*jri-)b3SH(qH77it_Qe!k5ehw^UHe^+Md#kO7he(`;#wcwPQ zVb0GgeYMR~Z_GzSxri)Hc=cs4_ND6l*$Wiz2&*7Pw<>xTsg`dtxrcb(*qfDlIi1*V z>rU4fPMoY;=I3qd0bdh&(dMo0-l8fmtTxsnpXX2dfUO5vM(2OML9#oD*+eAu(Zer4 z${(i|t=ZYnzqyJutMB1tAFN~liSR=3ec*fxT44DDmAl;#ZMi@8s{JrB+4-AxNp0x{ z*U9B~ESf0mZnL#Gq3e+}o?UhAvs7MB88NUZ4KOc!ZfC}AKYR2jCZmr(%xA2_$h?&gFiTqBSFe%K$EQzTt zVp#&r@P7>dOsTbRiD~`x1s0#8h_OfX-Pge(!}Z01tM~Q<(LP?V4oAQGbNks5_QF%T ztl?x!{^7u@Q{oMw=zl9&E6Z&NSIndOK1t^37mej~Gsx|ZLyCtQW3rkeAUU$Y$ee+i zZ_ms%s-tEeBsB4bvK-8ES%}@{TV>_JClXJMW&Iwpny;9`X1`9GBct#>bBQU$$EAOW zvjSthRrkq{B8*(xf`k(i3ShwYm3iL&y4*SJ#y*L3|N3L+j{W>voGe~j-!9!811RHF zx+Kwd0aC{WUw8X+?ct4Es=U0oBrlTx!P`dLShorQQ20`4Vgk+%kU z`!ZOgQ*H3#@z2+6CvtVGOY1{}dQ-1lH_u;4NrvR`70jz1QJC7){!MMdB=+*?R-UTV zeO=^c#^V-TI?-;Ks!f{*JO%K4Q^F z5JqwB-_5fy)ZoZptdx<+E;Y5fsvfX$)pU>PS8dsNYJ!A)Vf0`CsyKjH>Jx^6Yr1U{P1=?Smsi+fo^^2L~4z8AJkIIB8_rV2Z z>xZ&8hHX|q9cO2c)h*pWFOa_RT`Q1e@rW_jLVT7#hh)J~3$?;oPS7gg?tr25tRk)a z+E_Bg#j<6Y!E%jQxqv*2^N(XNz{f7mMwAc>NL*|^))96nP%MbfW!JRGh~9z}KqXw! z4EcfRavO9WT%gN>VTz5JGf0lr_sE{iX1&&~%g{ZTbki%LgPRWh4aJia=Fp$_p2uxxnJa5n;K`KDp&Bdi>M-pfhE#epXtOAdd zcQ+ivus0t<&1GJ%fj@0xJ1oQ_YA(fqWgI6s;L66p$_@e)VcR1gqok@g9tDNd!%U9! z>5W=Iff$Awz8aMd3<9&r+_NqNGQ~#-Bk=o)NWq?fMA#U{&i4z@;*H|w+G9H**Q%1| zy;r9B>~*EhimI(N(Z{EbPdq@GrCj*fthsMKACP@4-e&b+FQIj=SKYGrz0VqEa`3uX z@2P;4>@L=#)#cJDVoRmPQ~YSdy`w;fVfhrW4-$m<@K$f_aE*CkW`gTBn$p%2U}p!1P~6~Cb+_RgL}#d_bEUCy3Tr~Jlm!L9l}hfZpL2XYMUlS zguLkH4PB~n13#m5jur$)Gvv@dtR_}zDnXrrUN)t7>&`I*bd#gDH?@o8=gXGJkzoAE z4r0HhYFHCj{bxB@!H6drOVqL@?ClCFO~wtZJ*82ryBgy0u6+x(n;voHE*shSEZ%n2 zZEP-7Sl}Y&%bk^d$=%f1=V1tVKXqVIQJuj#CZ0ITmU{MMv{4DlNxsN^ zYg3<)0$j0r7@5Azjb`d`BoE*43YQth< z9K77T^W>3Z^!>UfFD}bzEuY1UcU~cXWPDXcC%_acjrLUy9OUid%QIM3!nOD zH-C}DMb7zYo`@Phqg%`(y>~=aU`e}jLX_@wZ)~t$gQ}p+WER+b_WKR`YY6} zY{+F(-n$(9or2bCHsc)n^?x*-cQ~8x8}`4gmKv?ro>ir`qBfPNSu<6e5~Eg8tEic( zRlBuU#a6p^YsKDM(Had>TSFGeR`WaTks^WFO(zf^11AN^Tiq>Adjn2QyE!p&D z3)d@ehfV=~!iKlIbD+(UrUu$YwKmarB!iNg^i9a*_}2iZe}?xi;!L7_^M*~%&mN}i zsPgDf@^u^pK{lq<8g{7P%o${>GpK35b@(orYair3TDlM4fkqq71-gozE-BZ$0#yNB z_z0uy7AMMF)Y8q+O9*FsR5>bj5@#T1pkVhTDV_z$0y0Wz#3YOHargZ!4?X){^i=-Y zZR~wlGqW_E(`!vU_vE#BwGP?RW>=dvUcV5(cb7k|?AkNB!IBw<;J;BOj6AZ8bDYHV zzNzw)q5Fbba%bgqye9+F5=S$|)#XL>J8miaZk09kxQ&g`P4Y`CIs9!Co|SSN)?j+?E^YWJ=Hn#$;_H!b3JvC6 zRzr{t!PAEMMJ05_aZiPI<%aMXRpyakcs#}+N4pBX^r78l41#=un_TLbyJB-Z!M}8*btigJ2T~_ zG7mSYQsi6Uwm$l~^QJz>3+=wn>8AQJo!lGvy!b`*bP2DM_3eZ~N<@5;ZY(Aj;m-n; zjlNrAiK+EXeD2_=N36ie6RwdI$Q$7X4+EH=((q@nn10u%({CX2S=bw&2>RyF$bBAg zAXz1ygP~(BdFg?2GAq}C)flc!Zk~)(K0;n6)q-x4z7PxSlw;~yW=U)?!&e{8Q$s7b zmbwS%H{wcEoo$_-^Ffqs0Y(WI8#;a%AvcB*`a81Pxa&#@Z#2`=jrMEcbLARoG~?*z z#Xj;3GcR*3`2@M?XmwxZKa#l(oSFPD9u`2y$qvT)cvyY$bPLciM~Z^gk0Yf%@urLU zyb1St=G+kqylNexfB5eL@O2Bfn2=B*9qy2X5w++xJoYgu^IVtwF_VaEy}yC(dh!e@ zJ7aXqNZR~f!+eJB#p6Mjft`($vjv_#fVQh27R$pY9Q$65)0A)6ADEn|{%-K7=_6kRvbEfoeQOa_>c#(rzpLz5>`BJ%N6FbNVX2@5Dsr!?30|euS2gtYOO{{0L$M%(i~bKJ8rj$@nPt#+MyXy;Sm?<>jg}gcK<5%gA z0)PsBE(Pu3cR92&f)?Ns>lmhy^n$|^FAJl4^5KJ!h10e6Dx!r|o#=8iQl<0k3(4Prv%Gy;LBpeQ zs$==o#F&yEw@YEQSCaE#RN8>l0`tt~Jp49vYdH77AL?wBIXAr4oHef(R#O2wSt!5P z_+{hlNw1icQw&ds`lxn=dh$2bh?&T$q7G>Izk9R=% z;+-FJcMME80VFkmWGwM@^CCv^=e`%uy&*kEH~NAk*gTwG$I$t;5swMvGm40Nb zb*aas0b}w&$roJPU*JY)W!J@I_BDag(&5HqmP|_-LOXyHGu5N5QTPzJYwO#@DVQLA zOO(_sS1iq3`Pu-h4r~}T*Y*@m|7w0DV$i>XHxD{Xe2_`|D11>MguJSM9u zSrvN)tBc{c zkJ-g~ZSat3Wm6=>T+n(wj+P}%pFI02t#ot56jY^<;;zQK+SB7R9+{3hBtIVid0$G9 z1NOcA$0#pB=s$(cSPnGYM?)IH{e^vC#GYxXfqk7NNkeFB(AhVwLc-9yBsy$A#n9lYR zg|^2}Bp!H6+P8V`L6;$}hx_v(!ZPyL-_upCz1Rr!F7T6E`+=}owgKjlN@U^v0qO&s zY(}If40|NiTbAe?awWiArRLMvwuUu+sMow={3*806b*hv0j<{dLM{B{TwB%fZe2`q z`*ht}_Q3S9ePJ9Z668<2mEdwVOoX!i7b6oT+JGHy)7`Qj$BjiSxn{!3t3GCIp& zPBoM8w!e6fqJZ$joS{+APexZ~L!0L;g?$hs(Syr^<(T63BjsaZlU~p1N%O?D{rWJL zj{%mzDXbQ(aen%wpp2LVvr9c?05iiSS-@eQgmhRxDI3%vuQiJ#kH%yEx4O)e;-^uV zda>O%PM1k=b)Y16T=(___<3MjL_}ZOujhh`Qn60Gij!> ze)Ilh-S6)r2wnoVu39<70s;>EjcEqUL(@7$^4f8)9(;QK1g4Q#eFy)TB#@f2KVM?e zYfJu7kG|_n8#w6P`!>0SP#-mBe_`n7EKMI|_iK~?^)<`h&`+4Z{w>0ZS$Fm~e@XpK zIe^^mdD+`f8eInT96#@q?C5^*t^5RhOLcd%!tnvO1p{#^{0z zIRuo^(#(XxgHY7Y74Xe|B)*L;6V&B$0nAc)5GS%kC?|Sclmw|o>#n>>h3!orJQBUE zRN7;_q(!I-Uh46jX6f$nkmeeRlktNoyZe(mq`u*i!5_k;qg|fRil)<>A8anS5cSh) zBnlXl2H`IF-4tN`W>5bz#EOTNJMJQySVyAA%%iXP=An>BVyLv##*I%lwtrVrV5nc% zF`9WSpqIF=@I#f}`97AO_;)&RG>&pq%=3H`kg#EoXja< z;Yg?TUoXUlemBZ3>ho?2h)tH%uyP_kLVHO?oZz`Fr%((zIyaB_)0kDqdfyu*l%dN* zC%*mCB+A+Q@N$M+QmH6jY~G$(-Iq*nLAWFP{iVsS%=5zT*J;ZJ?s#pQwY5{0d_y*H zG*qS0>a@51nfH|K5Td8g*Z&pCf{MBX6to0+JE{WCA5IB>q6vr|92zr=H1~H<_QZlL zt)M#SMbEXVC@w&G)0jInC*ipCnaB4YO6av$4u&C?Hq0Qi&5KR*enGJi0NI5O=Q&<> z#hGLp4ZCaOp{e|% zr%6eoVJF-|3+C;b$ChEfW6_!$^kM6fAO;!TN6AsirdDm;E-x4x1cj%4S+i;c`Re<> z3)dFrLnfo@Evs9~C$ck%CPTXN%Zzt-Tabo5Q)7r|o}65w4bQUGE|hpCu=Ih4wT5XW zzNUD*hMc`APe}|8T~3YYGXwq#k34rJ+pjhSg6lwWjhq6etcO2#$t9sP=?p{eo}CMs&o3t9aWOBtJOl5O6NqT9Py8}U0G9}g%*&FC6R9N z(?#@+mJ_32aJdCA18V`=xi?2d>F$YmEh;yXz;V_~i+SU_6Vv62<#7!}DfN^7xT;N@ zCUZvRuT`nA<899+c%M>u8qBpPrV%6t#A|U>GOL|%Vk3Y7i1Lpyn3c3KYd5}!^}0fs z1l9`$`6Tw|^+)V_h|D*gf#TW!jt_Pw5bMUJ;(&2Iy6e4m_7wD~?)z7<^P}t5Cb+LR zQ%i*8y03y!F2vfAhWh01Q^@252-G;^Ovf55y|ofs1Z#JOdwx!|A#@~fD9nlKB=?c7 z{>q2>8A4hp34~t?4@pX}N`ap3o%I8xtB&<7vSb@MSFu@f3R&ob?!x^e{O;oT!YM(X z%e&7>BUMCcQaNOE{v0ckL|`J%*n5#%_sQQyJKNu7!tKkrX*trZ%SZ$xDGw*}s08w7 z;Q>KaGY=*j{-`_VG+$*Jq4g;^D5vbht^9Z;|K~gfo6x(jepwb9pyWZaHSBYkemsG7 zZ>CtZvNXAxeEp34(fo%nlXEX*$Y99prmfSslg}UzX0L^d(M-*hvngqrz@hPSJGAG< zK(O0)8=qe+l;8UrXnHL%55q-_1j?Ahf5m79Vfs*~o)mcMcs4tfWH+ll|Yoke(axrAW^xWib0 zzeDRtOp;njv0JbP&-6tV)A>U>mF6!`)*mHbT>bb*XR)e&6jqviIF6?%Phi+BcvmFm1KFoOHt;IX&OLcjz#~FdNAL~#1bB$ zjQ|})43NlVy=jazx&t(l2DsHMu&CJ1;y$0a{LF^4;=YXlIscOXOq=n_%GN&~F2JIA z5jR6)JkSnH=rqL}l$2h@d{|Tikn&;&AD9Q%J1-$*(N^G%D!I|xJHw8INRv<6TQoK_ zBCodbVF$<`@vx*n-#BW5h^dBWkzGzeX%bc3bTatV4dZw*b!DNj*&_^0T*9yu4dBTn z5)wEFrSbW#zlTWQ21Ue9um~6<)z#t{*#dWji=m-ZN)>Bg(Mvj0qOaXwCMS^|FSbbq z9G<+2fw5J1tmpZ9#9`Wdc7_gmw5?PFG5Gvs7bW64ea0qCGZY^KQ&F_Ze6(=l12__D za6J7`45A(M;8u!R4*AKDi-G21iM6ND4W4-f(2)dZCN3gGfm%=ner1v>emVnGMp{QK zkVJ@@q@w8_II3bp`92p@B44XP_*d_VlrZ9be;Z!JmE2XqYdFX7n#|{}P2}O@6oVNf zz&pDIToTVH>YzKMt}`9k-#hhTq-6Yc)XT;*V1YdQTgZ|D+jE$#rNDRJJr~WZal$2T z;q-809JfNS2L>^`4F%1lbMDx4m%T_?eCl^^}$50kzl&=0565)ykQ^|Q)ZWw=O{-aa) zZ;sly?Yu%rIBd>;R!u`4gS}B+vzr2Y9nIt0b#JZkD$72!^zV!QdMOLy1>}Qy+9im)!_b zhyOEEExLI93Qvi*61r`08~QX6muOEB_mcve9FbMeAS9V|XJ6H)(A?7q)SfFe))6TA zBhf10>#MkZpl)o4l`Y#&(90uHS-o>Z8H-WUd;Qx^7fMTN{q|!i5z+cxSI^HRwFt*z zavD6;_5df1Q#T{lELCoPra&f49)N!|^knsyF5G_a#VJdve-d%^;g@T7y&dlVEKM0W z6g>X6j&Pr6X%_n;L2bv%MAUG17zMN5xe2b3(%`8nMOAy7HtqHE0zn;PGvLpSj!Cfl z--9W;z3f} zR!M%9Am^)inFk`Ep3hhaWr(4G?71rIvw8W6N8x+={o2wuOH|Q5?|@6IQaYFQ$A7h? zV6CH$V~PsX2pXp2dyqzbw|dto-S?_z={}3d_vxZSPk6HI$^N903g1eI1Dg*Q6giKr zH~FRiVk4<-l3hU#D?yP>cHskDsb25i9P*z(v*niIuTh>vXi!RR#3go1kzNUdqs5|&9VlX0frp9L$Qi>U{TVJZn`gnGce zGaHBB>Nx5t)D6zT4|b(z5+Bm{K_@0sFltu5De^}TH>spAJ9^vBhpsM3h?CQW7%xdq zPz%+CC69z~;u#@92hSQV%UvchMRurv1~ap-pXRo%ul*}VTyeeEvo`r{#wOQc)Y(f| zaT-Bw(l6WIe5Sq^8A6sz>B`(L9lx?GQnzA(1MezKwPY-23)d1FNc7}!ZFG*YFAydE zU@;Byev$U#>Eq17+goAIr;hrSIM15{wufVi1s@i!z{hXm|I2VTs5BH7_;ByR`JXE9 zNH1>AH0_NhW19^97I!~AKsVX&XRaQL9`&cEoJ{))rq>a7o4+L2;wnCi!|L2CpM6WE z?IuL+CxWrD<5uwBDJD$M@79UZN zZ%h_FO(&G583T5 zACkE^b%ASYso0{lcC{!f<4+0M9)n=B?-}!W`Ou}I^^SPM!rA)k#2NBZ4{GoxOhGsw zaFRkBsnM`R6xf}AvVLL1GsR4bK%Q05=I7$8A>v{-%7MhQ&KW2|?y?Oxh|hO&%2)1Au2!dmKf)IRzrlxGW-2S;Pu**KqwNvW)0Xv>~YjUYptv zX0FLCe3|}Nsq(U7ld-z5EivJGqBl*4GM;yq&w2A{=;7O$xI#5&F+Yi5Cv0+{>d$0CiMuJUbYD5OB4 z2FcY>zD8$-y!8=h*RrphVIgV8rr99*?p3Hyo63B&u(rD4jkHd$TT{T?RdJkHjeuX}lAhld44rc5<3UK1=3Is( zh}M@0Bq{RgEOy2klE8Dc*Z+javi0lN4HobxHor4PZm?chUsw@r7)hqk>yazOqLwp& z6%_PR8X<E4Hx03+od^c^b;m50 zx@0eR-sj44PRgo76`Sk(S0Dcu^1w{A^ANI`^7bSSVy8c4?5} z;U`0H!o=ZBV?&dc%ih`OU!JkIx>{i3qj1?IPIsz-+1JN4_1CS{X3t1maBJYWA04TNZ^iHZJ1hpt`d)Wb2D}j zx}$>(*6G{Kd%d|352h$>DQ!}8Px9Ej2Yj_rcteBhAjDu{<^ia}*iDmX8sFtzg#;YkUp(zj75c2cB z<{paEVs%dUWRMz&pZ=}4TEtMJ`xeE1@&4b9Sl#GHO7l)3Vv`>m*KZI1s8$?2V-gdA zy$Lzp%(!inH`L=rqt2tcV0=@tBd&%gG;TN>E$Nu3m~eCl#1GQKzwquSLo z=G}VhzAXlZo3LDDb~mOeG_r)fwswH%1w4AF>ynZVJSN?@PMFz^3l>UJ*!L@dCDrSS z#yf!~MrkYPqq}eCE4$lg7$Km>pFO!ss^a6*Ec-e%$N7%_t z;xrfx1wt8~96s+thlDT2@b~1YicZwH6W(}8FQ-ZCf4Qgl-gv}KI8aQqW7Q4wy2$vFF9pc9DRdex^f)t#s=4!p6f zXr@Wvm7l>6_(MH<&U*wh_kYuAs*tMrW79zS>V?su)>(YVe-J&hzNt%UWZ#Jwm@ktk zdtvz`)am)YI1EvPI8EW-`^;Rk@9>oFof)>CIvZ!CJD{Dk&Zl|W@PlxBm|tSE2FSzG z6dB|G{MR}U=miBkLC&lPn)$*m^<|?Aqh?pOSwArBGlLhx`&l`(JYNh;UiW9QQKff)OnAbWmfV92V2mMJ>v%$&L;nZfmsS>1NF_#96TkeQd9MR~r19E3mS<`$Lhbrx3#` z6sITFux5sPb1f|n)?e{+e@yHubvT56%cPt-UWNqf|%$=o1T}}3wz+oo&X-40t6m46*b3g;NoukCxK=qOcQS=d(xBY z@F8R1vn6#RpJ47^Vp5c6jRx$)lJ3bTiQ4DIqG$04H<5 z${N`yy$6vH3=PvD+v5^Q%otvogUDRTHxF)AoBIQ1g(nIpus6DV1}quRN&5^w!@n1> zF4$uMgwtDSDr^dSoDRQ#B465zlq0LsG<^?OySVx(JQD-Hp! z5w43;Nq63FU(aTXxt9ea3$Zj+pZh${f=qXy98Zm>?)<(=#MgRnjqlaSc^w^oNz z4%i$wdSKP02N=fSCZa4>O$0+BJ{#6pL?#WmP{DUChDZ!xtXDso#zYyuSMu6-c@myA zL4LKpt2D?ScnT{WKGCVC3QzptJhjCSA?rcdrO?ZkIqljTMfxvkMucA2LMxsP;yyLq7Ag9%-oMq|i{Ed__rvVO@3d?u6NDOUm(hJ{G$CNp z^~R_^F;hRg_s;h0x^@RC zK;GT>#nSW9w2ZQRJhfmV+r!p6y5IMm_v>BNJU!Y0gWi0d0r za!341rR}fC!-e@$1l#u?V#Isb5L^W~{jWwJh|Rery9H;Ee);ep!xnT%*PwNJv2SJf z{1gj9ZzyY)& zr;d_8V>mlszQLUD@MKJ?#MOqtt+&b9Ti}QrK=-Z?UET@-S?VMk1~L3jzuPrD0T2(;CoHgC4tnsn#jBr0&XFuPMRCDOZf4%XJqydFz!Y_y28Tt zG=EV5cMITZ1m)~Ns~WqcgL@YRiY z*Hk8wONs?etbsYan8zjmNf zDk*PnJH6Ij!PFD{s!nHz3P!+9WDYOY66x{WRm4YyT}KJUjnyr{CTy(z{>4pr`yzzC z4~&Q3zs%cG-B6a04||7|NvyF$SOcY?$6~a6L9E?F0odE{^(^A7>i&s#__Ix@W>X`s zUvLoWKCuZ^83**Jn!3oA>@WE{P%0WV$#s;D>Eh&H!kb0yX`=#v3&)Vob)9Xd z^12}LAQ9Yh`vYopy0oy7Su45mbs)2IyGFc7XeR>sj|G^NyD3$^_t9izz2J&rkNsTW zKU`qw+KQz`8gdBn1kcpB$+I;4l3m{;jUo@UnNo{h*@%J?UL=s}h_qoG-L{t8T!6jrL1F7>+`#j^a&2v!xdnWdAPwEw-$S)bv(LNaHs7|pQ zu&&X*RDKKstO;N?lHS>qFOEc~a0&MMFQh%(L@X%@?>@Nm%yD2xh+HB8qlFbfhg7-n zq~4+924FEZD05QSNjOB zrq-c(DWV{HQugWxiPV);#Dn#YTJm8YM6}tf*Di63llxKPJ+Ol&_mJ-3&vQ|2!k+mZ z;pokwKdUh=|D=py|FHKVk4vB2d?Ddr{c%T!Q^2fE>p{ocMaCSbZ`XWVc2!Pau-d#w z&REI({K-PP0XrUNN@E1F$ou#`MiuD3G4Q#l>R9kB2UE%GP;VMXA*}ykR&c+Y;qv@U zs)Q1@hs*#au?+eCK0m$%Q(_96ljtA^+y&2mEg`OzR+bTM7N38^!ZCUTGlx()a<~`B zA}+;GI|22litFaPR0!t#4dK&rCo8hU63^6XwzDXgLxwk4;A-w_)qX-g8e&$8HfHJB zchjvid|g`?kFRi$c_dtKx`|!-JZC(2Re|bViSm_D*njlR2il$LQRjG{fA#A?tLKT= zV9H(3Xs0>PNw569(e$(!_2QS`i9rSvh%?!q)p`S1;?}+=i!Y@jM@v)NH;MV9pNe9* zlGw*}8I%IIldRAHB@kP)-5J2@^rBldJR`d1$d?8PFoX-WTBvYA(Fl*t4F?SHuY^m@SD@P1bch|l@5KUKz# zUV{~Bv01;zS7c^B&J_^0d}o^vcja0GW-jY#5WJqs?0K%t_WUjF0QnoM(;K3i-zp=Ga+g>e@-f(d@gHfOts>qR z%H-iHeyy8V&3kFEeh3UY-4fPlR>vRFL1nE@mQtHqR$H86dwa~`$@Oo-+dct*C%Fk* zBux?GP)fs(V5+uVMq)aC&ThT}-OENOHGAQAO!Op8GH~7FB~Bnar2nNahPg*NQX55W z#mfEHnsFN2UGl6WgKJ1J5^_Ldiw1}JXc&FDYG$qjwTiMHg#CW+@+7>PN-`}EXJWa> z$bOrf>oTN=hw%2okm${gDVaD+OBMXHJ_|I*lEO#U87r;0b zP*T!yA1*`g&=u6^!zNHA>IX4{tsi?XCQ3jzb>WgeJSD;7*&?$!O78yVyjD{vbl|_VY{4DkE1zjx$kNrm4sF0oe<M-EJfd#DTg!D|f`{-Mq61e3At#{sNuPOl+YUg}iFOEKd5 z5-_f)A>3VnZO&^^&_&{xQP$_5-7i1A|LeFRll>XpcKdFA(tn=BM&;11+i6BY{29HF z<+=dC$_een9;TW}m4dgr?XHmU%WQ+s?DS5@B;v$7+^}nxi-~iWD1M2(yAYE?EEs%* zn+fQ$1LM>R3!QA{P!)Ld)YoG0NS5;+9c zu1k^&W7tT*m1->wpWV=ohGssm{rP}i`Ii%V=BbA2c0Vx4&dOMH`?82n$1$Qnt#Esw z2L3nfYmf(yBQO*atnc6XrU3Ryl%PGeTMi12_yUHKavv~A=&I9cX3>1T3y;&#kQrLN zv9L69RxeyMq@`p=U&`kPIr~(+Oqassm)-cMdcRRI8A;bD7td{H>YtFJGeJ>P$o{+M zZm-~-zw0iCSFeC5`LFt@i38RQw^eY`u$57dJCwEHpTFYWdOf!heaI~&y1x+3_xdZ1 z6i&wZ*mLI#sNxj-9!#zlr9@y;*aB44jAK2^An5+U4P;|yG01$N{vU17dUXm_2jLi- zqWI4|emafNjH8bB2bhGn3EyrC{YaVzsBu0d^6j~8|?7?ID?rXS#tDnzlNhYRG#$vBm9wqe65O3uajaRYDv z+z2O9@0|f1voIdSDR>)saDp7#sji;>-*z;A9WfhQG(SBVPlY6tdHyP3BX(|u*!tW-XjtRCQenrpy?gvXoqQ$Fd zB9$ti`RVQzT8bBAoOPM@TQP+cROp~vrJZ4NgenThd#ayQaJ?1-hA9ioA4 zkku~C3VyIY%%_d!=|ON_o@2<}C&aFYB+t<9b^=#FxO|$Ls29dHKGatIOZ^r*o^)l% zD@o#x^x^8p@eR)FHL;AUUsm$4Ut~`_lL4p zy+SD3B-ws=M4}PITSTAjbF5%fWNXm!8>{E?=l+kLHrme~1qmO`pJY;rg&Zb8oYY%U zq8~GRr$zIk;oTbCXkYAvmSaMMT=!9rzi3K&u$zbDVf#)>ExtX;->wMMn$~{k{_AZ9 zDF3Tf!J+-aF|N`Ka5P)c{yYVh7L7I8fDq(~44e5{Wau7}j7P#_b=9msp4Y*W?mmsA zHSzCM^63HZrrp&cFH7`Yqs;wGoY}!-=WVHbQnE^O+Sy4=`|@Ac4EOKRFqH+AG{NER z2OhEqRc~dA(lBgHJE~-BwqkQWb3xlWu~L9k_0&34U@hzW37_5m$)5v7E{=!-lYsuu z)36Yf=D<62K5k=yTlc@ljy*oEPSO7AdorI{!_l;I9{aqgdm&XDel-6&S%>`;bIxr_ zWo0obFP-`=$SbYe ziO=P6-0ZrS`C((mpkN7$j2ye=Lk?`ar6P65F66(s6r(-I6+1qA+tNGmqrnf-MJuP& z!>1Y!1(h(y>mdKMFZ{Bo*q3Y8^!_W$vj|`XKfmMsi01HOMw@;R@er&~ma}S2(An#O z)W56*ou)9?xA!B^^GzHG-E0i)VlSfgBhCG?#wat~*5^Km>d#&@bl3RvJD6?SmPx{N z4y=V2G*-U1=yC4F)@G>C^oC@1nBgo?!$La1G1YG0@-J<_Vo&e(JX%N!rX{&+;|Mr- z_{(KnL%);A#kqamdKK&%F{|6)$q6v%s05t?jxWd-!)umwSDn8JjD-*(wtu!X6ip)i z3)+8B<`=dFA5wPj>>dOW#lx6NRxPadxLZGwN}Ooq2^F~*#z6b*nd0XCQ@oyZsNL?M zT_t(u6~_mnn$ociL!;G?!VNShiym*VQA>h?{=MU0ALsKVMw@B54^^SgZMUHY(uXy9 zK)=)?9)_S5y`3?&cXUvzpHe@vi|kOr0u$}8dtmFsDuvzG z&1FMg-)~N!Kujaj%f9XLY5tJh%8(|a)FU^#(1SBvQ5Yba3=Fzx+gtz%a(lanNDyhcy8QOE zIw=O;_Mbnw&5Qdrt_JecmCQ559)Ez0xrE95Sm+jv3A^Ci*@jj;FchqEC!V!D;dK3A zwDtd40H|x9(_T&@lrij=vZ=Sb3NvfOD5dZEYG~StPhq`Ieg4R2iB8UtJf9+l;S;mmYddYDRhFOTx;WHehz7+qrrMJ#>>5>D;z(ERx zQ2sWgQ*gk|br>;x`fDF%#Y1?E-ErjvTa)h2yO94}Le`Imc4}Z76)!iCmSk@yVc@~< z#HWwhXe?E}t&VX%_wI;Zi!6`{S1|viWvt2tjNhckKC{h*loU*9KW{{Vc;CgWH~-ho z0vd@%UVUhLIHOiOm^8ahE+xPPi^$t8^2YB?!@hA}hvJN$e zTauN%n$1}I#yJ@7JR7vsC-0kqUq?p3Dq(?VzX<0R!11PF_O35hMJ~OQ29?EYLUBc| z)rW~>L4*jE7mME7(GC}*Tb2V)JUlD8-G!CxMlSnFaS*w~0-(~wN$jv>^3EkBIIJ-VZW>xlk^{vOJ1~j|uroaC zaQkD{<(vV?z5wIPU7rclJJ2oX!|QO-E9b`;-}kIF7D2G+JsJ7X@9Y1>5IH7(b5CZ` z_;*q~eL_MKFQqnTpw;A9)`m;YJgrk?l4Lc z)qKS%f`-J1TUsE8PdoniT<|VsB=oUVnBf7U&x`g6u!QnkhwdY7{#Wcst~UtH4AWYx zRtPVl|M=Q`n%s{V7;>2b^fzZwUxZ$EdnlUVk` zKXLqaV{G+538U8XO1~F1WxcG_3~tv{kcs2`?N|*TNxCdEgwwQi43fr%L z&{wEZ$4#%%rG6xIe ze0w>9JB1( zLDk-W7W&luf72E2#_%eiz9|F$o4b4O`3Qq>lB^XsYyh@lZrxW56YhC}qnd%yse=1^ z&6D1e-dKqi?TYNwek47!LxkbW_+`S^2O9E@+-$d=n$+Isg>M&*%-Yi~1ZXOo1!(1H z7R5{Cm(5+V8#*VvFn;wx!eYk`61EpV&vf$Y&h?|QXu zwr+!y5UqB!ck?5O?7}jeZnAxi2N1#jkiSq*hLI+aT1a0vwtZj#VOSb8Ri17I|bOstXgpTwLf!sRW@JDJpMTfPbQoWWjcB$m_>c?PmK<1g|Y-2{&+h-W$erc z6zm{%c+xqrj(f&LQ^l!_WG&iQf{o?DyPo(T!S9>4=o9IxEbjbA&gh7@8y;TXnM5ee z*A&r@I{%4JWL+zrKK`n-*$mTB;UoPCqZzH-Hq~ZC+P?hOtRP9k*}N>qGJ0K@?uRk4BJ#;zn8zMhZ}%`*005BVCT%gVSh}^C{ocLH&`e zVqgJmp{*9Os>y(QG{vuO=5cdR1m>(-=vHvXaG_z6sgiqZ@Zd#RwUrH<$gQJ+7Q^1O z&99)ilvCx{=L#`!<93ELasWUxpzM|QU0T~e{t(1r4e}pwv4$W<&#*=6*f;u(KO+;n zvig*aQ>ddJO_Y?W@I3-V4MpNo6%IKfX3=i+&${^+{-|b;s(w_f8)go7Xr6N2h0Uk6 zSWYgR_sjeIGc&`vb>O5iN9)Q8N-(2V0{@XPBzQ?TGc zL~oJXeeu~A&UVsX_8W9VUbptU1i0oEYowbloGlIS(_|qAB$p=5if_9e?8_o)JzQLX z8ssh9Z#v(`a^V2qi`^UDZQ!~TST5LrB6x{aq<#!mHtj~Fe@7ZSvk%Q8KHQKaszksh z5M|RduywNCwu#sT!!m153@a1J-ot%zsFV*BcxH(Y+$O*4vCu)%I(TDAb)qC^=L85X z!_b=CtY`mSW6$|(5ucWZ1x&RLfcXQ~v2fxcZUtYhplG6;5Q7+nSD>xe^l%a<*b%rO zzWxN8Nva{=4oflRC*7bE`EPBwtONCykq{9Q&n#>SK|Tg*>NgdJCwSI?YCr36fp$w` z?e=h}ZDlQ0lICMi28XNHsIS(Jec%yy$Zm$2IoMNNEGV`->_%LQ838*e6mv2DT2-%fmNN>ZlZqOz+^AjI=)R zzuN}f)Si5sI3C{CX1tz(WqWtHwLy;XfEFCg;Vf`PuUzOpiKB~n*5UlqO6V))gV+#Q za`A`tvd2`!weS5QjJ~`dbZo`tmv3n3f4-tpmGNMa9UrujzG5^xWZa7wKUdC0@BcFq zm^LX7er;p<9QT{G)K~m-C+?-W0PA=8U$;_q-W6b^PDIrjbv*k5KYWP1h-wUZ>@tB~ zzw1AmoU^QM*i@7fW{^})UjdB7`L1#kMh2Y|5dY*XlSV4h^HTKvT{`k;uZ3Xt5I~1i9-6oXK zAIXF%FkZT;{58fO+1UCwrHYy2|_yB{7>E2lab>yRWU#PVSF;<=utp<0BS z-8KwHKWmBUl;P`+R8`{3ya~Uqz>@;s-k?McI6n?Kpp^L9jHB}@)0G>dX6^i{_SZLZ z;CSr}acf?YHNXVRbNOu^;}xE3a-Oq|ultNtgjTQk~jA|S|Dox;|JnZDH{MoTZBbQ4jS4yxMNxZ%Dr#?9qiNsvo;6B@qH4EQ)QnMk#3*XU zs6AqDg4in}K@yRC^ZDWXAG{u~$2s>o_qng@d6C;P=P~Hzxm{7n;rS{^s3g(x8Vcfh z#2}s$&^>;l$!Y`%p?diN&_@v?S2XRaTM(vcwC?&ayYT z7ECuDLCk@Vd%1r1d5XW%{*YW}9c=7lF8J_;#)zmjr1&!cq&mMz3V*Ee06^;#BS!<7 zhHoNeCVYL4U(NSDVLsp!hJRlKRPKP?Quhi&)HSJg)BVc z45o$5pPtM~J4*vVI zn|DmRU+*+X&o9B`DW{eaf?O36u&ZO2 zA2=Ls3mg_0{G67t(cJz9?sv61IP=$fTt>LCjPp8Jddi7!x$-^5`*Z7WhTI>wpX$zC zMqbfNo2g+j>(btM+!mo zF`VY%`lJOEuNRcAuq|;kVXn`8BwOmI?N7`aO7}5YZAy4M0`cbJfNnv z^b#1sO^~-DeGNNSK?)o|k#BkWt8^9*mbw#{X({#>SKP@ZznL-bO^GtPAA^S`d(@de zUr@(RM^n%zO!Tv z2`?CZ{Q|wcb4S_059D8L4cEH9zSC(t95o0EFHn+z$m$Vaa0S%k z`)^U@8U2By{KM4AEv;bY10tgX19%d-z^Dpc11voOZ__^*Sh_Urv=fPWt1C?@J*LX- zCv#BpR^tuZ56RuafhwXT#e;;b)Z0Nq!KOPM(r(LVz(#p^%TeP5X6T|S2MaBso<>`8yyRu|r{)(K)h z&iB@RS6t0`Vc2g>z1yDry!|$y6r;969Xs39uQ_QzUsVy-B;_e`Z|=vnoOD&c4%rSL z|DZDSKtdsrRATCy6tAnFU3t0h+!Fm}%~Ok`PdD_{#ISs0J!=sIksYq{k1y~SKI!Gq zLWh5EtJNZ3y@Py+&N=|#WgiWB1Ej6dTW-cIE`;oY5#;seWn7oO;%Q+jn!Fl|#FIQi zh~_|22?fi&XUUO%bkdgPL4pPiCQ{PQNPJHq=mrP@D+(6Ot)6sPgJzEqFWHcl65y*K zq6UCSPhBpcMxi+|$}!-e(c;aH>I2TjF(I{o7LQBGhawcJ)2D6@)+T;8WD)K8r+-^ItcMt4$}TiG6m)eo%|GT3l|H#)$@x1j)VfCxq)y zsl?5^&9TJ4=<(PRHJ}b;>(aJ;{q$BWc4?KuS(``FwpMJ0ehlAwRn8OaimGIr zed}N`asq)2FX>^^dGMk09V@9yPp)ZwU{|sREI&XEK6P!=`D*|C?^cG3s|nyI;4aW) zil>wXLLBcw4_hKyQ1Nga(r5>ZpcI|GBi@}FJYT7W>_zhf2>qZ>`&ocsyc@dNYcKa4 z`DteXN*;z?^iV#bW1;b7d{=&1Lyx3Y6_3e4>^}sF)*u8!!2{5(MIn2vuSZ}tjQfywZ4Rvw>|$w?eX;FIdp<&4{3H4#9yAFMpTI;`A*2>32^+YtOQPATBg znD2eKIwu?hQ#vG?-x;B*AE|{KZ~<=D{m9c7NY)(^s=%(OK;v$72W_BI&pkSC6#5g~ z&b`|EJlay9Tz4{|B#<=08mTP0hizxfH$VzbOsduGJ$lQGzYH*;=Z%9T`UHFrBSgxG zkOtfKt30g`ZDYSedM;^IrO&|O`9z3(+droE(bMp?#11&74Me0vT(BT|)>mUSvcbpJ z@S`xwzOa4>1xcuYUsJ|3G^`1Jt&%m-Bsv`tEpNRzPG=(+kBh|6??%a@QR0{2T`54BvIWj1>Yfi6jfjM%J%xLx(c z=KFK0;SitpfIs=(YA@1)PsNIO1JcdTkee3!a07hi<5TxARko)4+ zi*yr@_pb00m~%4uSpQMV6Ka)ekRNr$ztqCrfUsaPJ-1b#sWco_sK~xE^7{Cc9LZ=d zqxSQQM_Kw8E1UpD(CwP#79(^Vcsj;KKd&27W~0}7jB$}~e*~P3qS|s^OAHx2*55_t ziX0Mc$uVphV_&8Gx!ileJ@FsQ)(Av%VR}?tYFo(QC0=xkeI>-hT;UtrUA7iNbxnD= zd|)Zxmn7fe&$tGD*@*}bgLPNCYeI%J+EbcCj}@K1HklQ$^Eu4OESGx_hBw;IWJvF`mUQtIzjq2OEm!62(>`QwVqAz@m0nh!qj zxCf-amNQ&RBY;}SJ4KXy3OTn3S*gO&NWdp00?#}YiP=!XIOQ0*NFedYz$;+5!2M`x z5((5CKyn%o%?{pN-U3>4ytr@*-u9tfCs_mc_mbG69D^hE`&Xa|QQ2W`NX4(I-hv*c zHN`luIp^!L+dwSzWQTmsdlh*t1Ru-Oww3|`n~AmFGtz}j!M<8_C-%HWb=|A}dMk)$ zD;W|?xlQH*8eM)u&`3QzLSu|QHUzVt%c`;BpmlUXqa z$4c#ZhOd>H94cc2h*T6CXL?~?o95^4k7We4nCM2A)m|zy*B^hTK&f1t=osx0t*(it z$c9iMOjipGIq&eMU;<@BzCd*W9c(_~9_*&?2za;fTX)yySYhW(HNjU zRn^yc;y{^1P$JJLcGT3oV+{&AkupGs#G89Sc3UaLAP9ykR66fPp5#Kvv~Vg-l2p&) z);FKV$Vq_)9RdTF9(t_McavuvV-mOXKU)anN<_7kO7wFonn&UIsHD5}7uBlJr*xno zz`U6UNtJKEuI5hj3Xr_bNnSgr96P)tT_8!5NZQAH%;juk>s1&Ic7iBJl*1pLop|L& z*=0Qr5hn~lB4K2s_^r@RbP~jWjd|e+vQ}Lbau`J(Z~&1m7$L{zMR>3N01Idt6|w`v z&2@v0Qa^0aW*>-S9+oJWYPOUlt;xmv!9=x|-+Y)o)^7x-K}33sn%qhb1IgW-(M%U> zuQ540K5b;RIN*~Rct}pPiZM(^tX=%7wWs}~QGJgb@unLzQReaEam#Xi4S`XaApNZf z4dG95Y5Oyp##v@&l5>U}q?$aY1)nea;rFK){YE>#-Zb^R?zjZ>MoN6Rf2aJ)poV~p zLnlld@v4SFo9`@_5xkaAv&~nx%_tuCluqPrI#d%%H1NRXYFWpb_I~zY)B^Bi9;J}f z^eUM`%~+Hf@O4oFs#L$DKE<0GAPKHwfh(8rP*mOgmSkcf!(XK&kGkuVLO#ZN;slY0 zhS#|#GJ0Mry<8BZ+$Pdp(dyE-zSRkcz+>Bi#(P%)v5MgJOyTNpewNWj>mIrK+eJb3 zb0?T%0j7IO(`)p$u@DoM_`**lT6unPQ4fN=-G*7+u!A+rCEm0sZ&hAyQ+F@ZK?h3o zkE?O-1HE-qsV4cqN@_{%uQU6h`71NUzf-_Hfzt_`*g!W=VXvCj!E>GBf7ReZ@7ph( zI}s*t8H!v^F0ojbzR1}~pQv{|Zl`Q^=__0SDDhFt_>}WAbnpp)bUX;498qH{02jHy zGa{70=%H|o-y#i11f~F=P&{ZW!{YmO|)8dH9+KX9u#!Wc{6G-KVnN8I6kY zoT6jBg4c7BYX-^+oo(m7*#x zt2w;>-&SUB=Wd366(gw<*xuq+aU7PV$2jMeHFx6Cq#{6A?Xdx#&@r|iT!1KbptiF* z*q573`P3j3iMHiKQk0T-N^8+*uY;>qOH9{IRb{Z%cUGleC9kUKwV}_@WmmucMYAM= zEd1^XgpqHLSSu7{EFes${C1l1isjDvTd59{2pI z|Lz%P1Pc?xQK_Pj88A-jJy|wWX9(LH(f9^@QXw;anK(={13O0N_FeMBc(!45R{XJ25#b5 z10{oU9Ep?yrvWz*5dY7>i;%O7kZu0u3*IDMqUzhrGq3KL%vYD(+7WW*n*y);duSPBu-FDpx&VU4t19vh9nHa^yBF~YW>rl)M3oQie`lj8l#CT@<1H^p$GoCf0kUxjxql9Pw!SaB@VP} zPNKG#N~C-@0~E5e!7_6Lw%4_7`0b9V-3=d-^&4PWmf`Id`~ctBpZMu$NMi3v)@_4^ z@UCa0*+6Hk`QSA{!31@`Ux7ihGPVYzi)|)ujX1P>w1#Oq5MgH`d#(Pxid0p{kg7;6WCsQ@al;e zUg6}jW?<*jlLy$;Rp~F02%F8s2-~dhW*7$;Q64YUZb$4>F{w{hav{H=CryS1;VmWm zCE`S;G&F6k6YHdRY%oTYnn{Dw6Q%rING^}4{J6G0_kBPp3srY^09bPX0Vb~4>Jd}T zYBc}iJV*`ro6Ty$+#QKh&VN>*1_dwhnS7%U6c?2Zp6VUlutd%ZR&L#Q9|5vDzJzQP zmrq7kfihsox%(~Adtn~9X4bWi4~j?D1er6~QJv?i*H!4I)eYG;NAUn!vcWwdr@EhK zCj>_WXu6oRY`++wkfqx$Ea4U@qL5R6k#}c9C?Kgeu?W(uv&I7-I=NFfw zFacAEY;$px7{qpBM(wTHb=rr~tc3)%EUi1JcOUS>BKk3RTnCv~Y}vIK(CJDHKgD&( zH6x({&z_p0Z&Z;nTS)iMEd><7Q-!&Tt@eh z-kiqKs0^Byg-ZyeGdVQxD63nB?x@o|p?#v{SOZ1h-C z_lGS})C#3N1Bm~ps{i9XWaA-Z!h<6DdIf@xrSdo%D65dDeg`T>7jlX@?BvAZnng4oc4Jtn<`-BLu&_UTQvOj-y+6F&bR`l zw9(spTiUqGnMicdT7|RDR&?lil$1Q>G5H@Ld=SspCefu%31jSA2N6`|sP-n9!RlwB z&t5y(OT;3yp^mc|4<$79}b*>G|OJNuLQabew$$p;wy;AKg2 zEAq0u5mZPm3YwRQFY3DQDb@WgG~;x4;^)VP(t}OJ1YVH(K5TgBW^`;}FfNgwm3YT6 zh-jX3xoRAKemBk=P^UJXd_koWSl<%1f7BQL^?AFjA-v@Mw?D&!Y2FI;_gN_JEt`qm z?Fdga#>Q0Ll1GZ9vO2^a@+R`Gq&Wslg(G+f{T2Ks=Z*NnIB+a=i>?)rDX*B#LwWyC zH5<=8N~cleC;01S)DwwcJJ(fAvuw;~tF;ajZ3OE6wK!T2Szl`i=SrXV@t=CeYbEjf zmu#WoNB+1JlAG{@4#nlj@zNs(l?>EuFbyR@_S%)SPuSDzcVZw~cDK~8!AwP_qV%w> z^pPHa?3ug!BUp7n{U3fhss*(gXWz#Qfv-aZr&!9mDp8_%V^X|OXO^}?lqTAS zwi3hRRx*RkmQlbF%xx~(Xt-JcP*EyP7d$A9?_t4b--^4}`PVvE3h_JaxDZpa)%96x zHdQGS`r~uww?TPKV~S4wsAnYFzlli0O;Wv*VG!T)QW7lp;xCJgOjY5$UVo_3wWChp zjh3e!I=vdn2Vj!+3807+>Xe!e(KIc2y){x18LRhFp1!$1o`_AJU>>r0{B_;IbR%@EK!aGl~fhZ7~|T+Rf@cJRGiwT{Z) z|EUIk)^*yN?ly#$J!nSqws9CVw+k zdbgt=<1q`px9(d*Z+wg-Tr;9F)29T1oZ^K+8(XXBG1hAZi? z_kE%i4;@M@wUZG_H|(pk^1Rw;A zYINJ^AIp#8o16H$B+5AM;a;#OY4rnFJrIk1sV#YHr1SbB567pvw9`RVA|B?lo$f2@PMTc+UY$>c6BN+t0^`;P=TOo(&^#zBXI{s6 zNMqklHUOoHrkp`TNWv|Hn`m+^{A4EkB4b+(Exu~zAptys3F4zUwE^>SC|9kHJQ$<& zA!Sxx3D?(6l~vTjM`&RoM{m#1N4vY=m}*eqYJq-#+T7-Je3U3Rk?a7$&&{-=(q>wd zG_iEZPvk*R5SbZ7%JOI-nOb)fbB}~%eqDGh1#f=z0QXJ+@rBf21%^Zg>`5w|7)cc( z`GC%c9N_ro_nk6g0TgS5!rA6b>mJyHEbvtEe0>2zRD^uL9^Hbx=+tN*}TQN%r(vf9$Fv2)71W9dA_xa&RqS+9P4YCn)tPi0esp-#FOMh4ZI2^va`V# z@<{R|W6REo$HWPee9Q*NkhVZXToD`>bb6}|gKma~0PWEbEVQusH6{s7m>Z|$Ln(c+ z$lwm;`XK3LOwSp)y%s|3*MRaqfqy)+gi>~UA!sa|uqNH6EcAEx4e9)e&57qrarI14 zFaD#!3rgObM1ZxE+z~w^>94X@NWfH&M@i9DDV^V0Bfx&yV`;ul8L}M=AM{z6)*_dx z0CkJDC>ca;LQq#5j{ogc!n^1NYH_iuo^t>FU>li%YlSKGaB8k5;)pY7k#_x%(#XCo z`iC+FWA>n=8aleC9vTUkDA8dv%yh?UXDiN1bI)4*JZ)6vR@%H+jD) zhjHoF2i*Q0_3#%wJ}^}5efxc8-P`I{!@eo2J3an6s$Re_pJ~s(m6kzt}H0?z}c7?6|w^!B%A zSML-1+WOQE1Yw1AsgKt|;2s|%LTRoprNig9&)|$ng#s|Sj+yvBYnXXHlneH%?n<$Ev1@8(gX%^v-L@aMFA>*4sy7t3Fd8Yu=I!zDWU+FcnQs7k_3 z;<~wlkTp`F#>J)tl{0_+a#A?jL#R=xdZNLm?90& zjOxfy-~%fLr$;)ckgccSbrf$~M9nepo$I%L-1A;c16lJnXxF1Pl+=$ zX5ns#u2@JnW{Cp+nITA)`DN%C?9t3$oKTIYQDN#W*om6=Tw7=Fg?u7?1b@c3jN`p^ z&eLNB-u=@3-MRxqKPNA62ltR1K9MAytN=-mju3w-v=@VunFExmy+21g#Hb5xr$Pu$ zC8m*to`&!W-oG#>nG8#LoPJa8<&RWWm98CAEwbJBjr^$a*z1eBb)tlVA)_tH&QaSQ zrilGwiXEYycwfR(-QClNx_RMDmQXT!P2aJRF)`@Y|AQlwv_ky6YK!)4tHYe;BQKM<8VZ=JPyObJ8!BOgY&*MvgOxX zbk{$#KtjLm_>Xk*VE6F(;fDxwX8eS(Het|l{>N5zfm*3cuI$2XMO>0FbPurmT{|Ud z>si~{df=9ef5Etc1a@Fz`L)OzFnl#U;Il}^Mx`v4Q9m0J7L&0xxW#6L%S?JlNE(JN zd9Rpj6+IW^>Y@k~lwnxFnSNGI031&6g?P<#l=-aoY6rZdWRbz0KkG zZE*`ir;XMZ z4|nG^go^&RPxYUaoYh1Ic!9aTKE27Z!_Mq1ND?FS+A!=6*p;yg$jz!B)@?M-4V&{KJCjaXIkecGt}j*D~9V%w8n z-oFx*uj9>Lx3si`2k=1?5S5{D+)sdWsjW}_YsmO(FG}vuTkk_Km)OsDmiDj5I;wEa z+Hcy3REm9Xbs9GHWxX`lXZP`&?QZ|`fl%^XchMg!O0J1+)Ft4DViKKzr67SJVJ

FlS;+@y5z>xdohiKs8zkG>H?(_)DQjQyrAA}&q)`bl$KZkI}f z*VHoE1WveiTQD$ASO7n}`fVi(g{q_pw{<;91XaNFVBK}0E_ZRbPLlEr^fpHD-QHaeEGLBZa(ds#mcHKwIU9Kqz~9rf{6 zqY`Us3~=yLoLG6?iv!05@0sWTR`IQ&f0 zkbUw3G&ROruX_uWDSxuyLQY}Hk9Rup55w?D zI;=n5+VEReZ)@7uiQ6LNFwA440H(>n&iz9QH$)*sj@hGu11ZLF zm53zUUsEusf9~=ZT-1v!rsUD-34V0-0?+;fwxUWX&g@vo!XGyz)2@fgg1^y|r(2QX zCPS-=YYn^bykfnr2VYqnh5E@6ed!~OPBH3#K39=mWMiQ$BLO7YZH@M3)UpAM8;LcVkh;{X^;9sXlsJ<&Vwx(er*A zB_1EH!#^UI!t<9h=VW~_O*(Qub0skF`U;pPAG|!MQd;5sCAhb}ZMrFN5mXzzxief> zD@Qw@K))a2xm9t|rKsdF7Jiw)ujRZxRfg=z`j58Bf4KqWFy1|7rjv>zcifX0)sWr) zl7}HamH6nQ$x7GtC&uvnvCDFr-jPxsgRxDmW+yC^$p*42jRmltA5w0lj z_@Dui*|I_kt~{NWg#5D>+-pGEI@-6H9G|cSzjsF&;r(xOh19qiZ>_901!KqAix6(_ z>b(!=8|usPI0&i#wbwU4MZfjjj?VTNS(n{1i!U0q0C}8H@rTK9Yk8-d9*?=jtbSC1 z`*&6`1zwr9=U@|`jm2cqfaQKz{$%@Wq)ilh&!|;cXsg_*BI~RtmYhvrENR7E7x+8U z_l2j!7>iWA!+LcZ4fmrh!zJ(EuPG7I@z$Xp^5y5C-LeeF`OKcjgZ7mns9Flb!5!j| zD-$QcEl*rA_^=W&=OATv%sXg$^oRiwm)sh@O_7uq&i<|us^T_L4SjbUMr9MFQ?L+8S zPU-OLh73M;>y;GAe6MwQHMdLoe*LNG#sTC*Ua0hzOMpo(UYF*E26s}SLYE_PnwPe0m-RR(Hr8G5KbqU!+IMme{;l%o=sjc-WJ?*q$@X}A^eepmuy1P<&&>iiFg2dY-;TYnocYO zYOriZ9VQnh?x0EWNuduUsodB^j5+o{vcy|}ptu9}!^^z{z^Bx!>3tA-WV#)F57!ki zUOJibO#@$;m^`WR59*9ZNB#T`F0lv?bEsuqcv@Vz5b%`kiA0b|!5!BrKRRnQP02<= zR)E~n@fuujW->Xp`@1z`9M>lWLlF~L(=!Pl&8a*I=aI%rN&>9(o4LtTgFmOZEpuH(GzgMGO2uHHf>C{w$dh&BsiJ%40~= zdrCPi8^=cp&2&-qsj>NKZwH;tt<%dZUD}8+;;zFf@*JJ(bRRY@@eX_QRk(WfdHd+d z`M?q_^VD#lyL@_c4Kaew+vCkgTrEtyc-gi9RtxD$l#q-kB(tqP?g|RAK9r51{Jh>= zVt5b^m;6AFYz=#D$KYTw6mkaLb!h#I8E-JIR2Y@V{d=%6LbKB}y&r@1u86g7IfZVl z=iF0wYHSyHOy8khqYPM2z!|Dbr=gN$A>#zy84|;vUu$s+)oqjYttQ;V!&YV=$D2${ zt+BD>nuZUX?TSd6hRK@!QRIZS5zEVXBW7=>BSAa6$q`4O=+?=s=uQA9^?_FbDfoJzCl zG5apu`L^(j_u%5Tk|Qvid^AFC?d}CnGp+#`QX?mzbOW)L!B!R5nObr+=i`T4Mj3{r z%O(rYM)ZQFuHkfpGq@o(#GL3s$wI2@c@xviCg40>+vnb?mk7t(&%&=ze$J%PlF#l# z)bt;wt`*5%Dcl71OkVzDwhh`9N$D{5<)vo&h=^a6HGP%PC7faQ^1}b5Q=45@=0D2# z5@ZTE&p#t*9$+>&I1;F@WDTAtIZ(?}MX@SHIOPQ~M_(#!VF81e2H?+EzpIGzOWe8> zS)=Zd(qx5#`BT<5g`o{1836%kQI9ugUbjnOme`_)-JK2G_|YUc%f9QFr31@K(D{gt zxPMP)?BNo|cnhj)hZWHKWFGK%VyXzPsD>LWA*>I5QR9Cyo!xpA2%N?5@ZVOt5vuv$ za%OpE^Zqp-&Xg3v0;ns|RiVY{RcMrjuUW$P!eWvx~;ju{V-Cfa^dg! zY1>P{-1{oO9lOlbT&IlFCBnoo<(B0})Z5(*HYmO(*6f{mhDS@lhGc{LgD}7dXcj74xV2Aw= zQuVQ51Mu{K4Hj}<8o0$Ly75aqje-|o5C0*teMr?HFR~tBfCUD3w^r=a8X2@_mvttj zz-v@sh{n0bzweK+FnQ6r{*QHi+>6FLY+Vz@?2l|VDc0jFH zL|-a8JPWw}kv`!Rf;oh9jLXW`|7c#22Gwq6HTtqB5kd*StHs`>@E$`rpN(FZD@@GS zmSw7EoRc2cV1CaovJ2`cn7DyFVG3ofdqWYp*5@GuVO3vLiJ!cOH%mM&rTmC4o&E7r z=`^MThmXZi^uoQZPmq+e1W2Qcx&!}|f;_sET=>oz;TCn_F^fKNqw{IX>3g?ZrNA?vu2k2%R&njy9b|lmEc=yj zg13(@xLztJ48%*lPY|8AZKz#Q;dY#Be_&Epm9L@`Bz#jDuRp30sqi$C$P+D7955O} z%ao26nj_TaTK7OneB~GF8jG_6xjgRS3AVlWLd27@dSYNX^g+6i*IL&*ip$N?bpDdF zXFizbYXWRa#RoJ$#N6!blX=+H5G!2o)_j%axfi(xc2^9Jp|!oDbgl0p`M(^FpGZNj z2_z{>AtfH1V$PZO_`mGC$ABWnkUP786t-0xLeaOJxSe4j&AB?!(((g4HT)_5rqJz4 z*v0fc*L>_txxgISRwwuWf^d-(34wcFztXuY88u5@um$ubf!gFufk}R6$n!d~JX{_8 zlcNdUlR|A+{6+@m>t;@oC#JGFrO(V`x}3c~f}N@wP?qSY-17{(1{*yC$sRCku5}-boL2jR+2m{4tN16e7x%B3E<*bL%N~?!#A!Pj z{er&CF}iO!W+IpbV_$-^9Gh@#L(6|lH1-T4_1xW=c8ROVdwy`z9VV7R6GPAefPvzIAET+FlAd{-n;x9b{Ad!}v@M2cAk z1vAY%g`|AM@*95~_R(wkr9;yIsrC$ra?H5L1d+VCN)LGIs`nY2DsHP07X0yo0JB4E zk1Z)8&ufkRn{8LoBcjqDJ||6&ne7-0cn{LMceC1Q=05Mc_JTU0dE%`NSPm3^Z~x=p zt%DW&%$~vhNrZvJ3?s$Q_skxP_-DX320=Dt=*ANr;319^6&qoP$FCvCVam@^7q7ju zB~15PuG@dR-j4(c1!YNf63T}cQ_|NDrzhE8cAZgj2zAgQg#7oQ1R|O!O(<>(|CH?N z;2QR=Y?R}{qrTJ7c6Rb~sWQM_+QvA0HUEo+azEuKS!g$bQD2}e{ne~fY}EnOnG~~D z+oIQ@?`9lakMF9#YAnrMWB$_kr1{fSF2;};f&bEz9gT|3(S`C9mE2mv;mh9fo*3wn znRoI>n0g=$P)@_sy8Ceh6DU}J!F|4K!#Aw~TpzV4CVI)dY}%uWPw$`UAKbVAz0KD^ z{yz)g-Cir-J_s?1r=t_RYsbVeR*_0j{QEE4kyO`CBM>xJeQ1x%DL(qmbeWBhyNqc! zi5jt06e%p!bUndS`c`}$IU|5{MDyT+*3x;aesb7;J-7d=X!cTvHScbC^uIFz`QOp@ zH@VurZTr*z8XYke;?L*7*lF1FzkPtDMOCk#AhoPU&j+Li?v}m&`r#4iJV5rSOxwIp za%RM_GK-H zbn7HUtxp6we9#F1!0!Bm70dIVz3-m2-x94jCPdSI=r7^NTo%^^NYY7iCME#rATKQu{#mH0*WwQdb&+V9$JnR$j6u~GJVn4TUTvl`PE8m36 zG>HN5$Skd};P2VgQR_jN@0031w5y2X4$C%j46+i%2C(sOY?#g$eY=s&7IgnKM$Ml}|c75GW{ z`Ga{}MEwj<`50;6(ANOhD?PJ#p{{xS8gbONM-*$m2j9i=A&zxUKpqhWPFfM}W>uwy zy`VtZ-LjeV)B3`Q9TX^Dl=a%tU>{_S$dhJE_hTJYes@!n$+&9#_L=fW?SqYlpSMXQ z_zp(YCbcP#@ohTN(Qx)A*=1ra&OSfl&4vj_Y+cercgv~E#YDJJ?zlz6unj8cHq9di zE@lu*TKgAtWCZMpzN{ag(w&iK%H&7-+Pv{x{_T&?Jsusv;Apb43+u0x58U6lBKk5`^mGR0M zAfgFIm=%%U-b$K1_#W9m%AK8%jwcR~0S+^XpjP{b$ zKBReg*IsabunKz)?inX z2eR`m&-^_clDC$c$ij8CrN?zg@SugW$2K~EXJ78^L0i6)yN6{YoHuVqsF8GET1KN5 z4vXqxd%)%p0RrR6!E#rHqS!@;=+7^J`3ryNuP_`R+KhhQ?-54wA|n!dg;JYY^hKbyex^osF*#xqBkDWXUk zUoyYKMC2671CP8e>~0J(bq@%vNoWa1kzK@68Bvs9RB`9is{?mzPP4v>irTD*s_@Y(SkGDvdw0|*?uad_wD2X*Fd<>sa-4m0U{j;OrcQ;$OhJ9{pkMUBbyiCp^a5A+>+9P;((Jg$#7dyKkFAIs!W(<;vQUu9v6 zT6x=Ai>EPe?QEJ4okv(EqONp4f4>^rTrii8YF4^CU~w0dxCgu0Le}K}iBonb2#bkd zJwN!Q_gnuc=GLElUX>)?M_?|b@B8+T7Ys<;+0bR1UMj$gZMi-I#S(ASJ2ew<(Ted! z+pH!|Xf$>s`^rA&q-_FGICdDklUDNUZ_NWTb1iHK{gWnm!}z+oR#4>hRf_Gc`&j4= z5xG_xZ z*M(yq;1*UjH(_b|t8HFeqxd9x)^pV5tzdFTl!s|UbyH|LuHICgm}Ghq*_xrH_^A`F z_ouAcG)BW;IYoi=rch>-P9+2kP4l4wgCEMPpDOJ>Ftnd~+Wj_WtV0)A2|POtp+M5 zk9EmwoGEqQJ_DHQlO8)Zj@{ER+Lp5ouQ|nZPdiC)v3^n8=c(8s63v`El3I~*4n#LW^s4LC6ykX98h)C z^TrGNk`vyt>=+!^OWDDgfB8nydJ{rZ%2N9yUVdC@oXU$Ig}#vfPQQ12T=7Eq%1^d* za2+3`f!-PAFS(|QTmscigfrt(gD9zS@E9h0wU@%ci0_2*iR)P-d0#B=oW1WR!q)P2 z-1r%6t^A%}+HDL>i9WlE6(&aq4r`kay<0L;EXwth68*RA zllN%%-Xf3r|GbQuG_hOx;B(=5BWUn{gh72+^F4VC!E#4xk*)Ile^)mP~ zuZW@SVzYMDzN)_{;+>qlkMPj5nHv9R=|5_U)NMRO5r^S#OAG8%6nIT#7i(i#glts* z{2u_!Kr_E+vm<$^b2pHHH`UHM(ETLq0Q0GOMcw?zNUx*Wdj!^ndji)dfe!UoQW_ONB+&{hwGW`)V3s82+II zH>{V{{9H)}_FK-Mt*~kpq~ohAP>ms5vgtED{(84W5t|`NBCVR~Os=u{1NzA{l5qO? z*PUr>ekjkjo`IXWKEE%=zZvTo)|crcw8Q$I8e{#C7@+QHU52LV)R<3Ot#h-(d_hgO zu7%@-flCozi$ymA4|h)n_>RmVp}i40Rm%Mg*YFHc_!>R+JTi?Yf;TT@)yoF>c8b9H z2h1n&|K0H+7|Ijr6RKn6Cg`IkiH-Jo?RKPY#3$$zzfZ)B+&fT5BEe#(#rtKDA_eyPk}$#zLW7S!l()$ZG=#3 zCg6|rAJZqUf1E!od~r5x!PA(!j@o94;VWev{_*{D44)Cv&Sq&mRkgtAzfO2DTX3I7 z9UFbsB2Azew(TdgZacBS4&rac`F&2+$@Ye(<^|b?8N7=pmE-BW;-AUo82<49b8?DT ziv35qw}O3GRFaX6>pKJO$$WjGYoE2zS}6*Za$k>m4^Lsy&(|q||74oyVPB5S$-xRZ zz+yEzx!J#}Wg*64&{2pU!Qcp8cjTB3^dEs^QMVAXKfs zta`~=Z3+V;Q1{5U4@Q=a(R`qYWd(X zz=yWkd}fS4Y_ly3$y8|D26Yak;`q|Pk{Uh2shL&TmIa29Of_+JPQ6QLhVJ@MfsWO< zd+9K5Pwjnms%Dk48(7yO5r@&g~rt4ePP%>F7paJ)CUuUWF5CGnP?J32{ z!eLz&BZKSX{zJ=)M>L5vMtq?tgwz_Pp*x2cTBo4-k0ONHq5SNYqlkaDX%U%9Sd$c# zrF_6&6!<@K=!xvSRhicLA-*7lZ)T8y?w^Kl=$y@}s$O+odgiih%lvfRY~GBX#!Ihry4TGpEilVw^ts>hD71peq;`$6jYNN89OQS_e zf235nHABP`iNTQpXCCn{@Z(ffsDIQ(Wxc~)5-#+a6ju_Z&`1@IIhI zakRFo@$ysufIJ)b-oZ18tsMF=L@0`asI$G}`SZK4tIxQB7kGO*6-E7PPsgKQx%Yjl z5$67B(Wif;Q-qA8=`Z}|nmOd@9qpWq&lMHw9xd^<1YXHhZComi)tsYI%er*KkwU&| zkv_s^;RQx$K>y_d%#l87g~w;h%!XcwVEE9!YW3TBUy|t)#7|Y6;IS-^e)*pFh4_^! zIPmU<66dXGoGBCPLn{Y{yXXe}4! z&<-hh=`6_!7xVQk1rj>7x@y#!a;fbAmp`)vIN{Q&WCQX)>-f3i)cF zkK@PrQ$VnkGS-vz0nYU?yN$yaVLKS1F4y1jbtqyqlpgR3Rz*&`EZOX9t)^}oOFbNa zY2fi{M9j~dsg$?W%PND?$T0~&@*nh3;~mWA$COfj{#2C!4d1k~!%R|09pfK@iFiZ#2ORg3)2TysvP6*OoSZh4<Y^HblgjW( zxBY+!MlG^7@@t?qS2SJv_H>nAHsp)J2weF37FR2xuTpQj&Xqx3sdl}DSGSCgW~rdR z%ccvu@&OMx7j+VcoI0`rPxBYNE?*61sib>i)CzW6ZDeGBsh7!?>XdkDfGX1CVA&`Y zi~F|o4L*65pZhWn?kx6q@QbH(%OZYrpQ&qTy!4NWB7<9gHoA-7)vLTXSj+}gg_TAX zA^%je@r8d@y?8abgs9xX*YNHQ?O%O|+8*jJQdBz7j;BZsp9zt~jET20m6N6JLzD5n zvP!WCFek$7SLms9(;UUO6iseJ84Tn4MD{KB>E1_GXbAH^PABn;xn7+QawC8#`%?``qR!I|kRu=Pm$GA4caH#0P0)^H z^;yqvdd>~}c1dac$IGnO3xnMVqr95Pl44jUiLC=cg|tK$jm`f73HTd6n=A}wk)$Ai z!%oLHvkM_ea-UL-stXFj7X+IkV(`kWT2!T0^l3Yk!^ST%3qK&Q@kKo=ETx?xzE!Md z>7Ug#IsRC>_NfE%Wn~qp#LyNc3zRhBI6TB8p=B`sfgg}{di=#+@+m!)N&>k#rpMp^ zdE>9{6R|k1?yRn1{=kv8_gRY`-hb7r=uPThimCD%C2)m7X$k}02kCN^#I>v6zSz#o z>~7!6<7qnw4t5W94Hf#YI_=G;F1Sz`1GlqPi{C@T+#X0rwJJTHfT+-8J)ieNm35y5 zNy;TApm_XI6XN`hszscj9^Q*I!sAGC*&5#B6z9+Tq{za%W~iiC9_58KoYYx&;!CPC zH9*3Lmr_%qjV~IzwNw)4EWL73>&XKn-S_VJi5=oel!^Hv0N;|d6#J7h8Q!!U$TpvIpd^IIiE|!OBcP5b)Od_fCQ${J5%8r%?B7M}m z$-tGBOjeA!sF2B%-|Y(&>)skGE*VJDuZ=iIHq_QPtebb7KPh6!^L+*mYX}yIs?22a zd=<*0N|c>wl{W+5;f1p~<_;jhUv@JE?d4TGkx0t2OMf;#!!@7Dne{QK`0~f|l2xnj zXUUL3Q4KXCG)}Tp}tg1T92=X6Ru33EpZzU&{--T^FaA(KB z!M@?{;nDsVoc5N}7Cu|$LVZMBHMn=NZlnZfS{S-0YJd?}tU?Y6&VH@gPHTvOtQ?t5 zHlcxRWmzF7KorqJVYbY1RnheLDEmD2&>2qimC%2JZ(K-oc5RtA2J;`q z0=ZZ?zg16_r;_B|#hla}Qzhkl(fkUxR6oEeWq~n#_fPL%as09Qn`&@gR+;?SjOfYq zR3#gx#aC7IUo!sk7+^IpSm?pTGhGs=dMH9m;VkHlVLp8$rrK{E+=P=|3NABTb=Y1US!_@--?JP zHJbkRZMEye%dpE1P>+p2-o7#ap$Px&Dyht<(VA_IubN@#U{#zXsAi0TV5A0ItXs%c zn7Jsil9_)}^A+#pv9_gwZk|Ltvg@I8e|xdFD_PSp_Yc1oP!92pKOdSTMKxO`s#Td$ zgf-1aQp`gol|hJ*PS>!Zlw`JwQpw^_iYp`@YmC2;X?Z4V&SbS(^GHqfcdBr4q+A+R zUHVm*;j59{V$LZKt13~cyX19xr?u79_}o8mkJlC%$&^ggoqc_7(HWJ2&i?yuv;-C9 zRY%3%`S3RvrjUMXK>R889ichF{mioxH`z3{#la*5; z#ol?W!Jqk<_Uk$>uf#s+nwT)0;K|crQ{21m+vEKwnbYgERh05_F5_VMWM_}_w`Vk~ zsp!K~VKWMlvymqsD)t?%^mQj|8|S?3t17G+ulxrH)i%lhrJhwogLpH$eAMvW zzG{{RRWj+rAjlxsvw=|+SRS?Jcj~_eq~^xrgW5ShenLP^ciE!6>c46sAKn@X7kCS4 zRzQWsU*n=J%3}EA_{Z?c6_qqmrpMnY#&}&YbDGu-7oN}SnQ77;PkiT>kNYFr7%Ek;DpzBT)(D{l3r<1>QiF!axsZ`6u?2lt z$5C}~;X%Si6JHHf9OF-dIb(z`klxYm8N3LvdZw61{Mo@Z7@d-+*ex@hMM%`lMsZ!S2HZJb>yrG%@{Ubg1?7YdsknnHZ_~}3&m>R4Vq-7?ZrRWzY@VD_%LdN-9 zeC`}L)Lb*OrDnE?VQTwuL0*|A(NHyma=av4RVFH0vHsgsGXp*`eIzPH`5lw139@Q5 z`?@}~+%j47urpg4Un(~~{3$YMC|0wj4-Ju__>cbl_>mp_FnOS(Dc{QLpB$gIkdM*E z{fAaW@maN)|Cp$W>TJCO8>H~5wy9T5SoqFg8TjE9 zO*0WX1`kOK(?=wMv>m~!@XKf5lXK6;3GdwgonJosJ@Dmnx z@oy$-nE3|m)dIz2`53LY%b`Jd;Kv8N@Yh;sGO(L-%{u>Nj^k}N|Iuy&8FAC@0JLl* z`FMpuhOa7=4Yd?Bgt1pgyjrXl$g(Ocke~}Gd?`FBn*|ZO^v$+NfNBWJ zqy%OafMF8VF$o`Ts((B_+tCSbI|HckRu?l3&}xwy!9W({Z$X}ljjMq=zI8z{BQ(O_ z@jV;(@Kq9R(P5!WZ2I^>8N-)GF0?|e))a7+s^mDnT5Ao9_>&>9FM0hkg6U5=^x?7e z!Vd9|qrYHH)BxXDIDgYeMB?_14oV--{d{QBmqW=W{RJ-MOp+43E3d>p$){2sN5x zWk@)%G!Q35R>QO*$7QNv$(kNoU!W%bt9DUUKK0*H&k-IFo2r|sTR;SDQw7XHUc9zp zrWu1Bc5{&NFZFk+f90zAVN}`f#BqGp&1o7tUxPUCPt~*tg_nGvdgX-v>;74_{G6&2 z&f(sizF*y_dyb@Zvg?N*mLb$Gm9>7arl_6Z(+mX>HhIZ-InAXh^wiJ9$P;9ONTK~e zwsB6ni8CXCKH#&a{!0=kSQ=;3w2_Ye*d3Ckj{P1n63sJ9{XJMG&|k8v4apWgZ;x6> z+xP1w93ZLYpnstc_v|>n_?UgWe!%OfoDB|k}S0C%hb`TLf_M?D&h(;;XBz#XZcQD}t0TmPE z2^8i}uBjBsrZodo7?>V^)xQ)qOlCgNXL|gVf67~e`Ge_WE+jXWr^=0_Trj@>evQAx zYY~RN^(V-X>5 zfBZ)939dF%U?sh{ML%((`;(#`?D*EJ5t9nl+ik>BL8BLTQ`Li4LW?h>!67j#A^{=d zPsAS>A`iuNQbZ9$j=Z=1(HV>8u9|z^t((7Umq5n&&u?5tw!Wc`QbpHD9@_o$LtWcN z(4q;I7%w&N(vx0ANPYC+eFr+X4UF_wiqePMZakny2ONp$q-iFNFUhg=u2QH=-M7(S^GCeeh&KI7~w-o}%; zm_RkEBze`*yw;Ua%_P%|k8Ue&xYeYnQdu$cEShip?b?s|53kvROl?(@@|$_BBKgmP zrjuX}2`p#$iMF1D0;?oWp8Kr5N4GP7mVFV;@bRA&GuGJnlIrX~WS)tVuoN!MaD8l` zQC+j^mobV%{jSHq4?j{Q0<)JyYEkP+CCvk687U118ld4fayK&-u*!iHoXEqzvS+}b zY7t&mbX*k$pUkqalul%o1_X)xvl^^dJR|ex>G2mspsL#hbzOh7M}MiVPnla5w0SU| zgMC=0kALI(NF()H`KLO7xiDK=(N`27q_7nnKYz16AqW3&eEgdU8>Z$3E%O!g5^t(? z(m(}i;vPgcCIog~hHNf~lmN!WQaC_2sbFqrnF2w{&0t^$hd87Hu@UWxQ!m7K{b?zS$OT#S zrFFAX9AC$ub|gQ+zQ@!b64stEeAD0gYp@@0-#EV8S7N$|P72$`^g%oYtk8dy#o9p; z4MCbE=;T5`f(BVHrD+j=m0&lSf+g@DKuK_YB79{s{<4sEYb&k~qoPs>no4GcibP5E z3SXqmhtvpGge6DpdD}x(vuJrLMWlaPr2nzR=SWB%Q&>f$FR=!W7 zhH%5T0X5WwUF7tF%0{LBiGT9j7SU0`(XNB}1*cUlT`RXO^O|kSTRuK3$*L6>Sgjbs^dc--A%%61L(B@xi-ReTyWKA6>L>NR$ojT<)@arFq9D3YUa8$D3KTKs>F6CX$ z$$`gjA3gYJMGq1dKC_{5`dC}y_%w&vi`>Erm_;~(FA-VPRvnNq1rh%u58GR|Hos^! zZwunt$q?k9Jy%?+CNuf`;3OcOA7n+gdNx4z$9bUit|W>f3fob zeN`@l(`mQ*FWH&MSpQY+<^}p+ZP5pX3;RvFktr6D&N#=61R_TT5vDsuRu! zUjn0lI6_wj^izCtrRuo^x6?@4b~2HhztZCme2j(|7Q6TGhEvqY)YKLFdz>&jFyoeJ znd+(~YsgpX*hdj*tZ<~(iodQmYEz}4n8{VOET-zdhi@wPcMu}ToH~y+r@O+u}MuW_7Q^S2lyS2`TXF>-nIL4n8VxTf#r31wYmo!Dk-|8DbLq*iBJttfzu0zUgT@85+V0;}?H)#?594oyFMZSkad3UC z6d!y~5J^0%WJ)~%rN(_wl z?CsdLaK=gJEWh%e9XIKbI@Wa4BvF;AzHr60)X%A$&i*z^xP5hIg(*fAP`dI8FSxPS z2yuo%G(g+{Ws(j4_ zE3N_mNbm0M{&vTgrT`=ob-4!D$MLOgx-Lt$`g}__FUGRQ0(j1{%lSPN<&+?O%;PWl zr2Iwzetbt;v@sRF31ZkHVU@|`eG>9c&%X%-T+uYj_Q5_`8W&Irt*hH;L#6oR z0Wg$cg@x@1pG`6XMWVW@eJM@V@+U9@2=ED2c03>)t3w}D=lIkuWKmL4MnSeG;)AQ% zviR$OiK7dOs1UPgjvKYGNz`$CIDr@O2cPT_e^m`kz%c-_+MT~}jg@wx4|(m9tYMoz zaeT|EB8DLTjxWjGZLbCPs{FT64PM)=f@t;om<_+*OwGt@`y0x}}ZH8%}$w3tLq2t+6*H_H7^!h7Q=LWku?7VO0!B~>Rg(kX$op* ziU6QGOEba9#iF^RE(hs1&xJj|rtVB_OZDmN=m?^x>sexynYXlN#j}kc22{wR8(dH+_j=7e@+3t@c0N3?{#$+PzukZLxBJ(Bn-_cc-Tke> zUwyZK{dZMEA~CSxr~T`{GqB-S90%DZ>w^l;-|@v?EtR0WFC*%|Mp7Ij{f8dE8-3Eb znz{>L?fR&T8h=_>bKW(nnr6U;A6pMwtXv-G9^SVV?E14`lB#1D5g>3ptz@d|&wCAo zM~>_)^&I6os_hjOsx~t<9gDx?V;!U6OvmgCsgb>#u}8XoR@G^jz~A+!)!c@YnX|0= zgbTb|Ilk4KEcSPE%Mjw!u3G2%NB9kwypbB^pXnU-l`HX+uSf5Od^%rKbHOVVgzW?g zRiFq?S$qC9nYtDneDI0&j<23f47C20u5CddaLg;@KaQVkU8vpz3H9`3x~9JFOdM6N zuD`%=Xc+isq(3P@nXg(0H~q)(&AyR;IzH1l)ypo6pU+(X1zzm|*_TQ2pJWt-udYfW zT|h*%RMh($8(0T^@%SKht(4Rbii!WR#uo*KkO)v}uHjQul~J203dp{v$KOC7TyT2) zwT>u9_6{j`G5?%;eDwGbjPLR-D<|jk{u{X1f7|hw$i)3VNv&q80c z)5)}quEWtP#~uo0>ZOWJ9CS+@T~g!FO?fzONpiHj&-?F%CtjON*Ab!8$@DqPE_?oI zZ&oENl(Q>90ltmDs>Jr_ju$BiCjFfX%|8|C_%9yD!X{{zEkz3~{)&w8w?JW`&LaW4 z%ZYvM+c}J<#Ics>A7EVr} zfd*PqNOOI(L!C%5e}X*vW?O&>hL^*C`{y&>D;+l4Ey$);=J&+~fDUV8Zzr@q>82>{Q@GFqPk9u+@{G-&F(J6p(3k1)7xm@XasWORH>8iW zx9-RMA$I&_?xznqiTd!w`HALj78l0NPry|im_GiMauQbRD)Ol#nVM)89-of+CnC!~ zWBRKr{8t_S@~oM_HN;mlPxz`=ii7xT#lAVzv{RB~Qi^4f_b9l-))ouTjDYe35`?D0 zhM`j0FXuCHrp5b;D)6Te~ify|*W>WK?r^{cW3g7+5`Ah5oKhWPTqfe3Y z8e{y$P&=Pe|4H~l9OEC_SG7#?|Cm1J8=gn{SjT2ulw4e`5e@vOQsnK6O4LmXYLby; zOVdI>MZ=pLMoV1>Gfi`APhQt||Mv<9c8Egb%$Aq@ZEnUQMnto({0{vE%-*0LCN8=X z=lEtT^#Ur=ebKZj;twg;$91z=T>pmK>MwX5FWG5+;d^*8?$FlTnWg}pU%qpmxQ%xfx4aUiIP5@BL+EWP}Rq&$+7cyjP+B;P!h)4?PJ7F~#@P z=CJ?5#_-uAJVFC0@PxnkOrg&he?Z``1A8?D3Dh=#;GBhm70HQY_bnfv^QJHGpqANh{#^U_{)&5N zj9el&Z)wZ5?@!e>!npgEPw?aw^+AOXe!1RLE(|BqRaGZmG_?Ie>%UXmmn!5RvP;0< zR%k$C=+{3hKKs@D+Nk*j|GQi)3~#-YtW;gI;>_k3eE>TL@zWX|_zy3iIA(qj*nX^mm1+3 z$j4!WS+AimuH**(_3 z*cJL`)Xk##iF@8e|1<}V{Bsh%5XbP3-+%3%1?$JEoGF89grDMtP9fX90}xcV!~b=g zrp8BY8(TG1eMA3L!h%iW@q-U`uR6-pL)I?7?97Ghc)TvhI{X-?SROgjvuA$mN=U`= zZOD~u{CqjW*Z9k#jN$|%A8dWwywG)J(Bn>G#Y6@K%c~NK4;B{%fhFLZN`B}5kN?4i zAE>WtdfBty(?8rfR2XQjo5z_E*x&!;&kl6$B(o&&_8FEo6*tsQ(?>M}Nd=&4<{QKJ z`5PIh)F&{DM=nJjjYq0Xa{FU45U_I7me04;&z(^>kB1F&I}hi%II?f8ouA3%IFHjg z(DvhvpL3EboJ<_(-g*0D-&(io6;0JM-g3!@dxtxF297q@&aA6y5qPC?&yFAMXnVvo zmx|Q9_27Nn{==;I9hd)e|44VaQn-KTEq87EA#a$D^goV$U7tdt_>+xac__^coqx(9xFKk`0bk?cwzT#in`VQ9Q zYg=mOBEf^ZZskM??a8Eag>pfHBr5wlcK*lXUw`h&uWqQC`KAj$)IZwYGtkypH>*BJ z(4GG zTD$alXDxZ|XmL25&QjM%VW_=l|GefE_>j$-a5G9vt-!vRQ~byLpWQM2S@(bP2={;2 zhho8|xr3$@e9Dy zgvWPN&QGjc3zj+@UufV@AOE7Bi(>jmGqXUSWBI2YUx)gu$NkqF|8i=^$kN}Lg_QcKhQ4JM-fqs4PR|zFo3=HAQ#ek8V2biV1Gfqj_>+A zf9XbfDJC8uJHG2LY!GaTe<_>o3-Gm}e<6QX43%xlM)*3y)WsKePOz_KbW{~M7Jv51 zzUB%-4e_a)>rc9o0Rh!w^cQJE6!Bx>%f9YoMw4vg^cRLe0(|+86ybA9{7CixCrF=chHFkf`K8#< zqxbaO|DDEXy{u;W*;ViPNs$M&)HbDRx!ELH8tm=5^;4RF;FNs1KJr8wFJ&lhU((X~ zbF+&s6RM8~`gn2IXN@mb>pH*H+*ng=k1?wUiaoq!<;#9{6e9pt<-%u-8?r_ zKU3DORC<2(_2FHcXo*we;w#T49^7_M{i&DMpLKQZnyY!&csgHu_;YWljM!Tz%)SAC z%FEm2Fj4|ru)(*`%3Zg7a@KW!Tgm2UzT|I9&+jaD9n3b*=jI)%E%bM0>zdF?61ja1 zq4&8jE8-Ml2Ldhr@=8Im4uGhSYP)5kNfS&*Bt zr14oVsb6z-p?xpSu4-MB$ntF9GQVrpS8%GHNeNWEfWPy1u~!#vr>fc@r6AT@Qu~<_&YuRnt$S!c#!#ob?g7R<1f8N zgSVYw(3opiAT-vPE{ZX_9ze(8y1#zslCej?5(;g)qCTwWYeaq7#vbC`&}jeH?)e~h zMl5Ju!6nm7ItzAN-@c!1`7#euS=f4Fsmyx}^*&_{Zi&QbsgTc96^q3*H)|@vM>9o( zO0iHXlg^_Xxet;Qy1Cx>1F@3b*UVl(baGtdB73>h2L7Woi!2I|7Ed5dqmK6O{`_5k zcKzCSFKAj;U)9QgS~yY|yk+Z`AA0gOA2PwW;AL|F%8QVVzorkgIh8}DHd)mTnQ)~M z%1SMGf5`@FRKnlo=`tzW=zks3JP1Zbi{({9@M zxt)i%kP9P2NXOO5%F&*ETlU?1>Vor_G}YysK;Xg26d4&Ez$dRc^N$v^Ea&`BK3fOA zd~)-bmd`qULGwy?BvjHkK{0E#>^qhJoBps+#tHr}$>8oYZZszcgP?MWw;x|9k-zEj zS0EmB~#A1W>U%8w7M_=Z4t`8ZMtQ0@KqsyYJO~l;MJBVu8K-d?T~5` zQQInH86}eMw?utNvhnyp zjXyc2#8SBc@HcwOB%1c&`sg{H)>2E;9?csN z%JYtmO;4eZ6+b5a1brg>W7$_c%?qFtG&f{2fq&e-k^YV^@1c}kLAK_MeaKIy!dG(W zKa=%;I{p*Y5#hW4yFLNF+ZDI6fqvHqpO+ft&@}K*9c!t}U>~_^oWHd_sKLCI9MVVs zWJG*kZrXJ5YxtE=f;a4#|CdLH2A;U1`=(DU{EJ&DaPV`lD<0ZybtmdCeqGbLHx2LE zc=XGEY~03boAri&&M!Q*^Ov6)xcfWu7`w}Zl|;UF;d_6|mB52vc;o2a?fTJG=s%fR zi(0SyKyLQ($Uh5xU4vV1>Am}#3*U1SMAMl@4w&{wu)vkUavzoAJJW*$~fAh`Vw|pj1 z7$N%v{OqDrcph`Ed2zh|7P|Iz{P?4zyEi+&Dlz*)D!1_DS+D$Ps=8hRm_GYHa#dxp zH{y>eC7SUkibee6`Xg(0?(&xF-k+bb1P~K0SsLyg*!uIq&9}{d!)Hl4_>b3=`t03U z<}l#bo_=ZT3*VE>)`}0ZmWreMw{-sCgSD5uzW%fq^xXZgy|;hephQdRWOB_{+)%#; zHF8n^O%z9ZH=)L76Wsn|OQe5(;VH9U_0d!nKY-T`01kfig{6T$)Ni=(bxjw)I+dw% z`*Q2h;1hRs{qz%y{`?kl9{lWU3Ws)&d8Cy!4B9W9OprmIa z&+%F!{OR#G6l9`4K3rR8oML=e?&hPb%}hwHj~N>kMwbN185IT(pmnO$Y-lLxid7{%c&UEBIy|hOY>`~4K0Z3W9n=E?OD&XU-N!E>8-L2=vbDK%W@x0Jr_o3F zVjuEXyRA>Q)X$mIu!!(|w0D13-w}J0)ba4EbG5UY7B*D3_6@Zk={d;T$|uBswa{xx zCT29u&1I@`sp_8L_TB-0nxyHl(YE?+eiG;J`l!u;eFJ^)A5MbIYgxkirh`4Z;fW3l zW-Py9-N$IuM{j;}&v54ieHyA;W;HIX%G4a_-o-~O{v&S=$KPxs_x@|YJz@MpbNNr;FNow8 zFf+i{2;)CbIe#{*jm;0I$KUDWU-ex1r`*}ccXMmGmfYJu{%0Ql>eP()Fs`zzL$c6g z7$$$n?LThc7`}W8Z_*FQsR~tHgs1i&@cDrU(wZH` z-L#$PKklD#{3-O8ub3I^6ABUkJ%9JSWN8TS$p58;yS9yy&fhPXQJ46exj--+73D|D zK{qj)oIuIXT##yN1C`ebO4P|XqlbaicN&a!NE)99fc#V#I;Yt_`)+pmWy zBK*nx(=Br{vri}wbQX^6N{kGU%{-Q@>iThfB!Rz9PvAvWH6lGcX!Z^Hy8;wc|1~Mc z^Ov+CKAF)anMys#pI?W@pSv&sq#-v29POORw;p5?}LN>B5Ao zIz4Miu71{N@8QzXy_FGJnZ1yTn}3N(8$SL}*M_o4<-Dexa>7}GzQ ztIo_=sAnQ_;-|ZfTV?|NnVePCW?L6v{ZjAI;(_h#m*>|saK@O^H^$T2L?i8jF|Hs< zq>0&A{Ya~E|2MJBe=vmIQli^vtWYUawd}8Moe=-!URXO8A21!+wNSaf)YFdDe~&YM z`5f2n8{togpWXl;AH!-@tH=K-#+O)p!vSPI8@J3AMHPHMHWvNkj$c#9&*||ODMS77 zueg2X3%J_!_^W>R`#t`C;KrAaGVIcM!ug?M1QR-oZ#sQ1+mjT;pJ05{m=bhZg$+A1 z?C}SeQJ7!@7a@KUt+g?RuimP=8#I17e%ODxahCX_@rxkiK}(Jn1U)z?zL`rJq~Tjg zk{OP}KZtr3-x$9l{2rdqD`+2+|CIW;{){g! zr9pXo4)l3SeB_Dr#{zNwc7G#MYW#}uJw8YJD4;^w^w+Psou%ACAXeS$s{{3nj@{xg|>gb)9?f5z<_`Oj18vy z^tbxQ>^r6ZNB-m80R1!RDo)KvA_JHnfANiD#?R^TcY6F)_>qVFHuJaG{BV5#)$uP^ zx^1yR#$s7;@i6pxArOng+(AZ zxuXLsRUlaV6wG*hjwuA78UlPrh)y3!5%wsHX7+J>FiCdg=+mmFGKOElubR?N7N#GZCIm1eJ1-) za;%47Jfs(~T@V#k3($AN%9y`DAW z92AP-+hr7Js<{7ROZ=4qIEij^4ijIKpb7ejzoZTE)rOJ53b%T|UCke-zYc8CAMJVi zE>H4lKUio#q?~Xw{!`pvz>*fnK*~Tp#y;b(fg_k^hKz=>H_TePvAf38#5NeQ37DPwlc*cK*)&_t@i? ztb>uTp#Eh1pnBTDmef?US&PLVrG)XH5^iDXLp^p&os6nErlu`(jV+ar<&6ZoGZt_%y}+hXx@q zANK^ue?hRhSQl3N=S7@2MworNx@@Ri`c^I0AHd1PF23f^fhbXq~`d9}fN%#WxaVgZFhS)A+YM9E%B7W{+dWkZCA%E#V8Q=9`$|x@8 z803;Za2KBNWA>#^Ym`hQ|DlM68nx8yD+W5VO*73~1OIeP|P@y;RujX&S2P4>|d zvMQ&5vO5jhqP?cSXh`IMs`{;9Xquf&UX0-;prYvi9ADKkN9R@%wORV3{X~50Uuui-cYHFH=te3X>SO#P zeVEWd883h!pW=@WaIi_fzRkWMg#Fj>sUJ-GHRbHRNYY2MKs7)&cFEt&e}axDd?_F5 z1AoJ~h-)^|j04lvrAD1z%zi248owzgq;kKCo z&-vR+&?9}~_KoAaJ^-juR*iI1>!pw5yM5jN*%Rnv{TF zd`taT{n7Cw|Fpc0tK31I`kEepB`f-%n`E>BDL#IYj{ed7WP1F?-|!o0rO@|#JpK}? z2$76^)(*9b6^omeT=4$O*g?k}8-JN!QOx_7#~=KMpoW_u2?G_eKpj?b&cdc0uCq%{ z#y-Jsu=k!Xeg6j03>`LSS-GAmkoK4Tyr>O7ZUgyW|DgF3;o^3N3cBpD3jp> zWd#&}L{B6*1EdO7vBgj4FBYUxi;usw-u~$pPCe&Nqc-*)J={F1eJaZ?svd{$A^ZN3!f zgX!qM>d%fFv<5NUzN*RQ&r^-RMG46JXyEAfHDwhyXhvE$K`Cat|AZoYqQ=dLzv||{ z3=0!>$qa%vd>gXW{D?mrWPvaETH)LHE5F7UCAvP^GhTnukaq;*FFXDDLqfqlL_Dbn zAG!aGw{ILj)JK)$41C4?$MF?EX|C&I{2jki)J!?e8W{x7GR9)GQO zVHcLk$43?fWJz^GuG~#LKGj4Yzw$7o{y2U7YvDqAAq(1Ar_>j{!a9al)KT?0e*WfJ zRYp@m`75`Nd;c{9(1q0D6(y{H1?L|W9kq)BtO-+FVqo#W;=*rw{7dk1j=Cu9_7`LO zFX&*gYIL~415Q=FTKWI8_bza^U1fQ1lD!2jB#QFWT3dn_N-+XjAR(D^?{BR+#~kAs&v@R;7;~;$^=*ZA);*;#>H6#wwF^JOSmZXr zA3+s<4HF5PZWDZZnBapJfDM0Fu|+a{)#<9$M2n1(xN3AddiVxEBL!(&AG0WJB75t; zfM)tE@rO<{U3WB_Z`dwcs#aBPnyOW+s%ELAMr~p%s+1CxT2-SWwQEzRXq@AE#-bKm!MUH5eh6$!%cWl}BlAbusXh;K{xu_r)q zaw$Is1={;jC(%4p3rkG7weD#5pbvwjVx~T}e*1KI^I-)rg!pp0g_{fi@W9Gxu>Awp zg%^HQd_8}TZSRc)>3jUPRIFE#z6WrW&sJnh%FsIgs_BqQJ7GT~A!T*PrT5GBOug## z2D96!J9d(1ZYpEhx^1OPLLnrrld|0+1A{!hoBF~5L=E0ot2Xd(m*r zOp{K>9eR9;J#%8dVeTK@!T&_X_M*{xSq)`#r9?gN(hqey|2Aq=hgtPM)9^p2kHbz{ z?6jYWAsF!`*uAQp+?B+mMJM3fTaG4;0N3kkK#kNn7cVk0hondnja+~5INLM~?AVkH zWaGWC$Qhx1vs0SOKA^t#K#TmQNmtyk47t8Qm8YT`@r%gA(&JEc_8a>(hi3+{-NKXY z=1XpaxpSEkex^ey8L`mbq~|6QcwM@{^LcZy`Q=hcP}r#Dh0ZW%uwz!|xnpxe;xPLm zeEvRhCBx8rMo9q3AO#b-`$5W_*n6Z2^7!0QbW!49MlmTbvA^S7&dT&uN^$5!JgmI&AOavyv$CQ`r z0O}KD6ZAEn#QzMp^5XEHFA#6?1vayvM4H8?e{u{oV*`l>Z=BQ2;G6#x-^;pWX$6$6 zqUYGnSpGOU7Po#0`>#)1XrPV$Im$Td>#YxSB|cd+wMkp%;i<`gezU52=SmMHXIIR& zdRS4P7D;4X?cko;V;$@~xe@S-M)$`uwNUdk9Bp8y80_Db8PgJ>?kxsvDPMV5J9sjO zT@y+&jQd^74j8@E7Q7gMIHcd#{P`gHoL=GdBV_#`EnuM{9Pd2>kycUMCO$hL(m`fv zxq+qhz@Lm)f=veuKey;UPN1MgL~rVQ!C&{=lZlF?qs8Sf5y z^^HS`=-yNb9(r=_8~10Q^8~cQj>t^sQzRXMqN&rGe(p01r$LGc8`n`m(=2}>Q0~JT zxgzMHBz!qVRuvvu->x|)~7yhbU^=8hELWxS6*rS541MTutg9!0j!xxdp z78E>RfRjhqL_{Pu5=AAZnAb<@Ic1r8QeRM`c$>B<9sI=OaY)}0d&s?xJm|~_A1%nr zF<{$MlTgaX#%@Jiaz9C~FI*?6lYX|Fho;H39qNK!raON=<|+VC81zbdzR`=ronPNz z+h#gC*~^I+?k7emcixPnup*I&SX&e7+ov8EhR*`hNrudaNfxqG$!$Z+$6PaZ=;ay# zlS?&GkGnAd6__RT?}p6yvUWl-!KC!&IJ(|Z^~jnS)gA*mQ*v{+boc+E0BvW)$pWn7Vy?fn9h_ z-p;K9OwG0S`agMoR$Cx+P4dH85-&0_1Ef$U@wvl)L`8-X?7ZHr=dwezCR85RynS>C zjc_rD9Bsk8ZvKj&6$yJ2bYQ83nyTC;E?*=>D;!&|fkKNr2i4PX_m<<_?8h&zM!ypN zo9mJJUe9I5+o_H-K;`K%cbQ0LrK$oxf7J|8j_f5LL;(Y(=4O7U*e|02l2thKqYrwa z5NtYU-dkCH6R|Yjgg4L8S1E1=^vW`E6-wI;ES)Y`#_Qk53zL4h@}7uv|4XCtX$S)^ zg_^&)74iP_+3=v{Xp;Z8!~&UU=xH>-D2ccJL+?m|eBlZikAmA(cDKdvJ#3;SjTs|$ z3wWpg6EP-ATNb8|AW!bEZJOXJ&3cIvGAtD+@amu+u~RU2W%k<0O3&V0trC}2K2k9A_jHx%5&SV3Oo8l{H0;Ql z+|A@K2_fW#{B0In>O7b6b+doKjTM>0j?P~adsw}~nXDKbBcVcT;StLdZexAXx&RBj z!6NdR2PJ)}I^2#yD+kL+=CU&L z!*5pR(r_VSCU0k^oC7gVU^x9`oY90RMWO}z5xuy0)L%Vf$;o2@u$6rP--%^<%tAWl zOVi1y5U$iU6-1 z9eSH$N$SXin(YVn+2Dx8kuZRc$afpQIU-v{<#JtC=;X^qDya$1e#&`F!#Tn^r2+b) z+ku{I-a;(+_WdK}o|6dmI9!J>sCA*aMq~0Cz<=p?j9y9y7RkfBwsL;bzkJmSF8!=N zvllxO_~#FV=?MA9fosJN@ClMI%9_?4*ov#;uva1t3V2bDD3ExBAkOdY=9k^4`@|tW z|A^5P^OZwXq7ti!QB25;?@9m+pa{9a)%C%AxqgdZMqf;}`t_?8hf(TwCl^N+qABK8 z4uao)z0VhruqAs66FaSL0d$ZP}jy|p}CmC@6=M0C>?aocDw2Mz=1)*%gVs5y0mPnJO4=Gt1D)x?C#2EC| zho?Wkf-p+mg##k(0ScyFh%i(vB!C7;@QFbZ*@BiPe&7x!U_%INWalh&P6x?LEFoWp zaZ$lE@b?(#3Fk?P(rUxTT=Pzf>~L>;4(U&O+sSxt5s4Ar4OweQymNuwK?AW8jl%a& zQjHx!4PNBV|Djr4LJ{2%d$|%5ahgmgxj^i3mqwAU%nELB9m< zmqNZXa9LeRS~gGQNk>>IT9Ihb#SPOnZgmHQ2gM9d zlfBEYJ0m(s_a?B*4(L*%*h%8qE=ciW2 zj0Dz=s%k0L<$Ub{kE6_HZGbdhJBsL~LvzLj=)QUZxo&((@M zo3d%*ayyaYrhV7}s|-$7pH(+6V_p5zguz{bI~(s&LOPiu2@qe*{d5RNinFZkn$>Zo*mmKrW1j6>=6#x>X3w!QH5(}_FbPtrS)G~ zd~(ZwLBTQ_*YD$3h z3y>A=lfkwLE?*o`;d?RB)K{~Ej;q0l>70WMJVg_?H2Q7uq^rx#M=&BY_JLWOUBnL( z*5js`a;^chYa`A#H6GFb=Hsj2*Qre<2C}P9>0?->28|c3Meb1*uHXMJzUDzfaNOvt zL%+Th2DGq<_1&+dM;20Rvs}~+Vb>92)yD^n@SA$@xv@8jD2LW)fcZ&Nlgid;!I^Bk z|Mdw)(l$w6CYayzXRE->(|??YQu1z}%?!y5K~BJ@w~d+2GNQrCuk^eWxIS+Q;SLV0U z?S+GX5@4IAS^lq>i@>0CAm@VR;UJH03BmB6>__vT?q z*?`mc{|M?dToYYnW2?-@U|aI^4C0YuvOpa)g{)mO1Eer`zvom*b>r7iH9_R3}|;yt4!qCzcASziv}E1 zC~P&MM}oKVSW-vt2vJ`j`AIEpYZlLrh!%=$DYILKXk70UG?A!+l8@iOY47Z8Z%n|b z#CMAF_BeDZm-;UH*1D~v$}r#Wb<3g0vnTP)nz2HkdT9OCW+O8v%5_}08l0FPWSK>~ z$3Q5vmtzOgy#*Q*u~q8Xb8!vdBA%R;wa)O&H{r7_2A#~vz5}K>5H%tld~wKK!{v$7 zjL#u#cGFjwI1aH8k*0olNf!?2*jIr5K2=McRFx6=23KBgMMJZ8?!&R#%^xLpX%uEiH~(MhnO zsm^HtLIbN!>JJw66HcsAm$?%5c;YYZN|zuXb2%?E%pDMpLihtk(0CqrR**HJbU=Gc zx(i)Ix)T(yc95j>6FJHX`w!YP0Y}^CGm{uUtaJEBv zz-0M2!|CFfgjwx1Ve%Z7SQa zE`GOgv!GG^bSwx9*Q{d8#6lhucQgp}IC_Ig@vX3}v8W0^O;ll`ABZu3RE{)(!k%xU zXb|5e32w*enZBJkiHi{|)Re@shN44cljb1or%?fT81V;w;~wx0&NKz13AV8jOgQ0+ zM{54));1K_nMtvRFySe5d&F$6)*RQ|(A=v9FXA7IxT=tzUuQZbyy+i({+2}a_Yb_(3tFKt|- zc+>Z2Iyucd#PueOcJG}_P9#!|ka5J0{GBdwz~DmkUWpHm&Ae;;mI$Y9cx=j}j5|v) zkAR+XsbF>l$7!0s2T`EMcN`1en>HBw=GiF)h(<|{`0wty|E$QVbDFO9+3PpIW=0sa zP%B0Jmv!|!6+Q6*u+=ip^kB>Ots9@yPBEhU(d)v+lXMh~J|fpeWAUMMxWdkbVN4%e z@K~t_>epQr3_D^#LQKfluwk<|`NCO@f&K4Mh|u^%_l+_mqKU+w^CW(8B;;Q9%0TY0 z?+qJ=M?8;^^3ZZFU-wTbzyCt|5>Q1xE?$K`Jr(u_7MzqvzZ$*5>!}&GbFo*Ez6HD@ zMPfRKlid~DBS>YJ^02>LL7V}>&$!OLG8oX_Gj{E=y#jQpA9tydpA$xHIJpA9CDHX@ zaEc~vMiaO<#R{Q~ylhF_#7p8eaiFenFC0Zk1^A59_9|EP2ASK*em^+F5X75Kzq%jS zJqXSvDhLe?)JucXuPY}nhSn%xmlp6ZFWCGp21a)$cExnS3)Ix-i_bR+qHO-YuaX2P z5gt9X+hN2oq`b?l5qxb9Hch&xk08pS#j>R*ck zo6yTT$=hxa+~{@NsFV~0J?s~VNCV$i^2U5on9qSzA1k@W_o;hp zRtC7ar|19GWezJ8HpkU6G$=Nj`=bkPrXUn{lHE=In%+a`(xJVS z>|M7!FpsiPCl$=Wr`GpF_Ub>=cb7mfdYlzAD&=suEH}_~{wz9e<>1!m)nK0LV@V(d zf4MGATK+3^qhJ{4`)x?&mgP~TQGT<78YLg~TiU-rCaasyt!qDfHp&ZrJ+{{Wbo*|| zi2a*zk09NO&*U*we_)kab@XdqoB0i5OiZGL{PNx4NO^Xw>X89_1A92gZ0`0lTG zG7ogB(~p)dA1ac}Z)os<$4sT14W&)g1y9=^pjMokp2`FAyw+bPSf}gVarXtw)(t*$ zY^1D)G7;#}sdwZJxE?AmZOmxf?(TL7+wf}FL6MX^)TrKh>|$v%&U)z@b=Msn%xWIH z5vL{&tl40mU{De0z=eO^&>+02r8=!`^hn6TCj#+bd}(WR8|i=nY{0YdhpKZ_*M-*@JiZV zZex5`y|!pkW`f e!~8pu)ic$kY+9y^KR(W*Lg4z-MOFtrz21-nNQiS|A?>Vzhl5 zkEl-#>H8=y0mOx(lcLy{24$!gV|O5oNE>uOs#7t@Olp~*m&5Ia5N z+QNMY!;hMd|JLExa~t_?rBHq5jdv6ZWVufYkQ}~_b{#3T+&|R1CoB@k!g?EvAF({U zf&TVqgHOcMK{p8P-PzR9%oNP=n7F3HQmTK*T~7;E{@B`)$kqa+#1s4^6*x}Gxp*ZiAy?LP}A{{jufM! zP|s%7TQYLuKS-D&h^D(AT7*qr@$_+jGIXHGF047LDGa~#)d~ujGSeIPn2CENJ)O3qguN|`|4f|71rIw#lUzQa&gbX1H0lYM~^6> zY&B4FjY+r}_k}7Ep1W)^Vc_J(4NS$gjo_&i!lfm8H0dwWzGJV=pKLrN)W=xBFBaAz zGc;NnEWJIZ4nM#d+tB8|YM=kcPGEv{9TteD(iRM)$%7m#p3M{LkXr|=2JViwXM>2F zzP}_db>DV>0_0ZOqs6H&j7^Zo%jw_JV?-Wm(BxTCKE>!Q` z^E*ZuT#Zx=_{dIB4m%9Uf z92xU%(g*_!=|d%8zs83#xF@BAO!n>)o(pRMUwTX#*z)%c z2cjJDvuf52iLQeEv>Q)s5UU)*3`Iw*hujE+wot5TzhaVA?)pb|PpjZLY>mqeSgHeF zVF-5`ZdI>USQuJM45)ZNS+Y`rSOfh#y9LP9W}X*0i>VR0$BsZ#Z*Vt|RXYU$2KxBL zJc~K*VP#ds(jWc&VA3%i|nQ`nO`#y^~TABv$18S&kmLLw3o)2FZEP&ci zjOENMA2$!QQYB`dj9kRrt|rp>iI6skz3tmiS(>@!gU9v8nid|4cUXX0p+FhVjGcoU zHt&C?m2yV3b8j9%I?h(%i;kpmy&mi7chB&;jW^PBw(m#E+mTOx4uXBgqIm?LeqWF` zxvXE=O(0v`HBLt^CAK9~y76_zU1^0B0!yEAFx({danaG!BkuVW&dApvepdBR2c^T( zAo!fOl7@OI2G@%z1AVN#avw6x*&M0qy(TKcGA|~sJ^)>CP1iO(_7h4G(mM-f8Nl>aVGl%k z(r6O4WUIrjd4mT2Fivxtwj{P%h%s$QQb+S*1mT39(0qC+YzS5mQ&9fh#&i!ij>gl~ zZt@doQ1FJ2)nm56CevqNe$N`W25Ar>>7}z=PL@Q57|f@)u0gh%F?~Dl8aqh3B;va& zvP%T2aAWRTgo7*4HAYQzvW=iN29hcGINxVVb}r_xXjCd{D7dzkF_i=q8&umGm%{(1 z&TjYu&yefQ14B0qEN6CXiM`P}vbQ zbiBeC_gm3f+PIvuzT^BIm&rh}R6nJGPUhf^4%5xwOC?!?Y_+WBU1LR2k4C8>N@XKz z8z?%YjM_KyyvEu;S>n>b}>8J_;DL_)sUq}bq@MHvV6;;ZFizU?m{OyYG9f!%29s>_ZTfO86tWGJ|Eb}NQu>bJ}6ipL@X|S0|3D%29(X0Go_nPFklQVa_Q~6mi zijIA!2_1dFy1~{iZ_XIyft(_nP;Ju@z4}t+fx1vvoOQxa`3!R5UOf5=8%v7braD*K z_Q?YrfG}_oDZZ1uFq>4sH{kY6W5*IiULDzWj9!EM=*uAGLAMb8vtf*W9Ec zk-EPsI;@h3YU6A!iwm(v41c)~&&SxxH0fUYWB4U@=N5n^P@wtY_YB-;%4%Z?cikL~ z%3}+5uGceAkjwljGuL+pL`|H@Yp<=8_FXn?=jaCwA%bCUu)p(PR#Gi^YY<~y59ptQ zaG-f|jZFjW(i3Rntamep{`KJ~6>*9M3TF8@AD+O!43w!0`n?mV1-D)6&kPy7h;LBb z;A4yJ;o(k=!E5>R7LlL-Eva?2O-XfqLhy$DzS2iXeCOeQdyXA+Jt`yTa792L8C1=o z+nO343wphAU85w&yPK7bbZoDNr@ONs!=xx@2(D|8P*@*+SX2qU=P!CYj@G3X-v~^z z<1D{8Nu*9b{UoUk{5++i>&<;u+_bP1g{Tw;dC-({<*)`H!o>ejAJyypT5U7raLY;ML&rRxaZO+e_RFljDv^*#CNX|K?e<&{ zjhPkXne`1Cx~?$hzgKah&Dz>zbl7~H4DNws@QE8JXd>x(S;|M}+R0cK?wg>*!Ve*$ zCc*ie!%VnKZSE#i?;|j&LyNi zf;wa)@$~TOSL@)J@T7&N15+de>1vE*5rtYJKM6B!CZE=Bv|@37YXV<{VbLm1eedig z9+z!rbn3Xi()wgE3AFk`%@JF~?irVJ1`pP@XjRx#D%{wM?lac^{oUB)X5rUV)hwTx zCK=9cPXjJWBcAWRfO}_|karrm2ts4?x4TA`D0W$2BkMfPwSGEuNC@|iFn34xo?R=$ z?ucLZ3M~AXpZzDxs?QB5lFAdT7qMU7IY@@|IApp1KA9|q}~|i&6+Fu zptdZ3e0dY~Q#JK$KS8#&x|y%dA8f9ugJuAKbQ#^lEq z9ri|hjitA$o!Db-uLsAeCePNX9KoMr`cC{{6dR(wP<_pZ!GX#UBKspEqF`K)R@6UhxQIXzzsKDo^q&8 z-3^Cu^|$OCwC_CnCzaZ0j!QfYF-G*I&_&%JyDV8xrwxvG9z%ceXY}wvA3@)^$}{Xg z|BBkoglTy{%tFY%HmgZyJmcLmAzFeS4P9xbJ_If7Kz!ld^Y7RMcjLQy`Tvt&oGJ>=N!BJtnEEX3<84($vtqJjn2)Zv;p*1%a4O#qWbX~P%M>twyFtlFf;W~f?KYyn31Cec0%q%!Mj@KF%>$m_! zY8Ziz=lUnet@<$1 zt5eJFbodIiLAu)=&W82dX1QQz5H$;9A}QlUy%pfp zmwbOuT?ptOgpUaCGaD?3W|jyWqdAfWKAdpwpq3!IIC&ui++l83cGLTP75z<+Xg-@vt13uh z5sJQj7KwNKI;St!(HtE=RnSCEJfc*Pz5p)~HclFZH4!&7Vr@_QQuL|+{D3NWCFe9( zm{Sj*`n*)=mwBUj7Lz6T;ThHW=3|-rN0zFK+c?D!!zkYmLO7MXr!faV258m!vw}{{ zgRvBnK?e*R+RGcRxw?pR5Ah(O;}8Vq0E6&{pJCBqplggpWQ6uP=)fRa!9E6deW${-12xEczY!Gm}RhOpU_?*Eo-o5lQz7 z{NYXFR(E7jayV&$x6JT&doyfOO#Y*^)|!`)4m7}%r{!Rpb}=irtPeifaHQpaL>a0d zA=*AljCDR=7?OLg2bDD|=Dz=dKS|$i zemmKhW`a*#iubuHj3gxJXE%Y`t~AdYJ$hU(wd1qIMpXBOBVmhc3P7$T*w z2!=fu5;h56;ab^{6+QJJJ{E)a=uB5b{71b zk>?3&5*x{A0kIVv@H^gb;mVqGM-B7vX)PsUyM7$>SbX)`BddUlvMm;(6hiqPuBFKk zc?6=ZpBZq&2+x-`sz&Zpr$)R#lpuhubtY9i-Z9QrPHC`7!M9<-P+i-psFaP$F&>Ez97oV;jS*f`s^vz$+G6Nwd z!BHIk>H&^n^ylBn=GR(U7hD8?w5hvx+Bo1Db4U3)4256`@42j?AE-RdKa z9*rM8GMURJ*%x}eAm}^xaH-(ZXxF!#7(aJ`Cmq5?h5~VjSfXP}eUrPo-%{+YU#^rS z?l?5{k7Rch^=27DA$sw4#iV67n=dWrJ;viCzwW9IX+0@sg6Q+Ry#qQR3i!uM8I6qn z@z_S*zHxiHoH9oPG<*9j$KUf8u<;T0(<>aB*Ehe8Yp6-jd3!r(>g|!ia%|eQH%b{< zn#Qj(5s8v`hxsmSBAjWakDe`RZieP`40ST!gW5u6leAaT0h;alc*v&odX2n*@(2;e zjjTaUfm%s!D@=e72hpAM6_bwGuRRDRc@R(cx$f=?5t&X9@u=TS@)|@&!18#Ok%%jy z*mBAvq(aWb$kbeJ?gmo*nBthdN(-!X==05Y@z6IZG$B_4Vcd|3^3Gy5Y8pY^D7eiJ ze~(M7t2(YR7MW`zbu)V$;b&hQ3{$ciJSH38Y{OSlJqzyUbncgR}e}j3jfd;Xz+_~*BM)La$)XUcWx{)ZV(e9MLuZ7ID5%PSeb zrX?S}XvaT^>s`%ev$=~-F?_?5sRxlRYu*kfS9N1V8~#S2YTgGv>f7=NJuv&Zq4tP~ zQw^l~uLDYxClbr#@^EQ9uo#N*L9qZqmGj$WHh(=yZ5m5^rMIUULjRbTY-xaK#uVw4 z6Iky%@VQS&`qC9)MeO-H4vyaeukLA#EP`-6DnEGjPPMPi3N6SJVK}TE>&*`T?8sd+ zgT}ckdyzFiZSgPe?)=ntNoO<{_DDJLo5eV}J^njH*b>ug%w+$Fl?~(l17g61OCOEf zn6T)ho1>O@0^hCqB$ek8H9H4h>CYOT&7Q#}1o%rLhfkrr%5wS?;OC!yzQw$TX$6Ej zZTG?(LJBo_QRKv1c^c0RtIRm2WNxDZJAC=2MOtQRhwVCigJO36`?>Qe{jOsV zKgTv?sY2uAuvlT^$5vjR5BwLC_MSE2zO`3zAWphjwjw~fi~UCoYG|SumkyXYM}M5S zB_LU4)qDXZCj*1*mt@BSEyshU`ZsIjaM?1<^m#6KANTA@tNX6lO>T$$Gag%@2nBic zCb4}bu5%gLE}q9^d$*xFY?<%tZqmQKUw@Z`QC`>AQlinW^Ba;uQa|%2u`4-`1W@B% zt&wyWY+vrQhF`s3o0cXDDa-|=2RxRcI2iSDWURaUp5bE1Em~{b&z{ntQPm1_#I7H( zL)0z3d;T2cK5JlobLg$+>*$-G^F=ebVE%utmIvbz--w+sXX4McF59 z9QlbQVk68Eh>$R^NPdg0h&=Yd7;5ggVO8wj<{isfKi1{@I9OH8)ELHO`Xy|$6kNVa zGV}t5@3w*4Aw|cMBl-xMSKNEns^e%=P;wjTHbPtf5R>;`;eEtI|Cl&^givH9-)@i- zI`v2AvHCfb=iiuJ73I_O(S(}yB9zrIPA%^Oa@IpCXm?n$(K2vu_&Gj1+Cz`IN%3!o z7;7}Q{6aAdT$s%;tI74B$LmaT%&PL7#zCvTeQGrBKi!bnB%GJJq&)MuWzploFl{WE z?mrTgJnqNvnMS@sm)Mt;sph;6DHxruQhkekysa2ByHt#0D|YN_7P1};ce(bu72_3( z6k-8fL<-!%?wG+uL=VcTPa;V85$Q*wcMr&SkVWsirw^CoCYr_{&ns3LeWvYGgDFF!T4R~s z=HP}uwnL?H@0XOt*Sqy{)?ag9we_!ZD*6&Z-?3yO-k!t~L=oz7*Qb1PaFk0e^W{~# zN(UZM(=W&LHu#&wW%ToTOAMXc=&}PCcfmh@yU?jG-A_>m(%>}pB;h1lAZi&h0~2od z_5&If=O_9WBls_A0A8|k<`F_?8YT>Mw^IS1!7P`$`zmC*&e2gza?5sq9KQn*Yzvkm zw-0y_&llK9;v{B=j`l3yNYp68@KqNGb26VvdI#tdlo*>oayXEq9<`Sr(5=lOKoZS} zB1#XBrtwu~@|pkq(9H%HI-aFO#7yXye+VX5L&j*U4Rp+J$W#6Q_X1Gnc};%)>$Yl8 z3#W}lyXgLBuJTj?quj}LyPV&ZE*SyKieR$m7T<$RWtK8=sLgMsNwdJc!#66jtSEwP zY1kR;LkOEWzpy9%1$~@rNdXF7)Z~Eam|A1%eFXUdPG#8W(!^+ zN#NV)N05qY)giPgYtzRc;9RA^4frO3gLHf=Nxx8AhV^Wb5ToFNpB+1}$m0&m*2kY0z1;ASCP|>t{YeZU zmC8(a3UrDMlG>6GhfTb-px!1ax9RcKC9%cmj%N;x-~fjGzkRe1Xf zr+Hd3TfPp+QOlMADg0GF@&WmSR%A2{_8x%y|9q79;(ovl+-UAdZ1%v2#oHwaNoowx z5WmRZHgm0|>~hA10sI%#w#mY0W;;pK(hFMhxT3_^zG{?f8n=^O1Zo^YbfkLwv+892 z3)F>#|@lD<1%Szfu#lX-Lg_W0WLPmb1$+Ai3 z%i`yY+H$X-=;sVcN&{1E`x%yBc%S1Q++?Bjd=z1zTomr08;}`wte1%Oi%>CA)gh3Pdl`F_!i*Yfjs#Jji9ZSe~s-1+=~TpK@k*_za|kP?5->!aBkj ziMLpFvc;N6IGC6z4HNj}3N;NjB$;kTU&$ZHoItWIwZ#qG?+jXo$f zwgS2o3KSXnI@ql-G);WjAz4g?@YF%Lrh$LmFkt_Ska3zNv+cm>9_BE0(XpQXFMLw|Ef z{E3qGLjITtyv)WuyIHT*BWX+J_Rg#@Ka;`=LClp&Q_$|Ohh=a_bhj(1IcK5unu?lh zvsqzQO)cdM^Gw*UpV4M19Eb^J>4l8Mw~0I537xQ&@RsT z{*7_2daS#qd`40r=GN0Qds!P@=gbY4U(KNX18lXeXQ(Z;uxiyJQ8P&K_YIvdjT!fW z_qNq_ zF30svTomBbRPjwFq-3}*LbR9~O2sekz7HJ3@Q-!Xr zboQ@r#P{n836a{o{*|S({8kNCQu?4K;Puk4wDonsx88%mPW05a zVnqn)*4bf9I4oDuP31D{C^FYWPvnGRx=_^W5v!_NpO6V5Ov2BHPiVo@dU4grDj|Q| zhOYayeAZbG?jSdgQnls5*Otf@z%&Jc9Ymy_-@RZ1a>ESa96#H^1_WQ~g7TaI z>6sEDEE|D`T+T&AzW-t@FXI9dEqG^%B!5U-$;v5WY?ojdo2NzDhWufz$X}<_T1g@| z^LwjBMeCkyqEo>k(Q&OVH1W)tj?R}`?zmpbbxO{_atX=58jyo%1D7&uJKR0XHt78h z6HV_{EVuuV% zC?2-R7PSPhC<$&Jb;_Q-!jDMFJ2)_O2{718(CfOr^EHx`dtYw)Zp7ZpqdA2up3wh> zF1z8Q?n~$|6W5CbsAPaheQ}w8f>fo{pk(Q;K+)Q&ll6#I)FY?ifk(Uk$-j4ozJ4pO zq7%o>eeDE?eHyjQjG=BWf0M|7y6In5fATv4*5h8V?f$f&K{(lt0T;7kSH^AJ4c)Mv zFa9q-xp=gaddkaqqpnoM z!i?3|{=1a|;B_iw;w5#gq8#SGp@@T!8m2UY>w8i8Hw#y-!sLQ2HH)o9e-wSlzcQY8 z;(S-+A!+^^+G0(-rbuhRSV|}+OOB3nrso(}^^#22{x#Yu|5?|QH8Ucwb6CsUB$dLwi6NhT_)cWF02The2*uah=ECX1{2=t#|JI=8QEjs3)H= z{KDzVUTufug>gO61R(kXB>~W<>h9|8B+~O6@LQc; z)ViD16}O7@`JQ^4s}Igoi+xJ%^a_5TigUu6@GVTwNoz}qi%+k!50kkI*NVODbtoOfqeMFJZ;q3%bX@A6Y}cuU1VzUt1Y%>2`@b#V9MM6#pYy8@zXDbOOLXvNrUuV* znvcs560_hU$*7kH*sm8DkU*aeQ8^PIw*p3a&S3OT%k+eE_IZkq4ZFr%3mUFQiSa2Tm76rN$cKKrHc2d!=gOzhd?eb)db$YVU59 zlc*7-uLsh$u!c1Ev3B;I>UqQj(S-1~eMXbPHnXjLW;B^ocJ9gJIloFMeuD#*sv9(K z{@>SS%6yX+aEWmAe1Je%{hePkuZF87Rk@%J9FAr0;YQ{*8@AyUxIIN-stif``vr9u zkk6whi6H~O^bwV&MtdR)6IT2QCkKaQu}!Z-%I!Gu9Lo|Xc$ywFCWW;PXl(})zYUXg z5mqh1{CdiRMxh-KOWMhG@6D%TQ-trL374qlH?yab2U*ckH(%*73{kXGBMml7+{5!$ z?)KnA+2WmNSiv;o4pe!jp8S;Lxsl*etiguOexZZU+_zjg|G{Rh1}dGjjk)%u zMTmmw^M1Y~cSgPItah)IZH}|4i*gn%n3l1PaWWa<0hKPXMWzki2jzJiPAoH#3iK17 zBbU*pX(L!cm&eUJl~p8UZrPwdNkG+W$GqZfg1{^=l{Z*>jp8Zy7(0uqRIf)TvT;oEa3cPoOy~H*X1j4Ej;kt!d zDav_eV85;FI92rR7Z(%nY6B=qd`6!Sy^SaKD7NCh&} z19mj%)@6qJz3<%l=Fjr>RW#X6+(b1J)*pHRI>b38L8WxGuM9_&#dNT!y6A+^e{ zpS9|_tY~w!JKiUrqaXV@VXR}hx7XFApp_80nT|rOb<*EI>L1!m+6fM+&S?L?9gV?a$-rWo9naN z=G*kwL>wvLyl3x2T zF?qYK!REiWlK=*Xz^2|HgZ?gv6b&qsY*90#GW*ignRB*-;>S+4juqdY>s%!0avZ}Pj+tPP4QjW&kUI{zHbHch<7Z^N!axC-u zt9Ohgt;-iD%wbq4MeC0zTrzk2Eze+z^`jMhG;x`-I$!x<yT*T`AL@AaUH6L)!2jE|i0)8jg*jq<9jCorBX#!SZsvJOZU9Jz2d+2F= zeuXWDqZ#+aWP$Pvc7<+!Anz>b8o~`W_hurSK$iHLSYgU&sH)ei=uJU2FilZ1@A{XhX~5 zF!B$nYA)hew#23??t>7^pFAgtX9UxjY=1ALRk>l!w@@B5;lXuNDSQb3fUb=-m_9LX zrF6QmnT^%Xr79in|NC3-HN3zUV}=#m-H=>+OfAWWDYG8aNBCq>8CPv^%#a#5;7x?% z{opLqvSD+2(R>UQic!FgREkQ0$&U&sS zCao~u6P0V)onGJjq!GtTLM&3T>-@IpdevYnw&}$7$jL|GXd!AXhkIdvV2l>-s$->>Bw2>++rpmJQYKSM8kX8k~oAI<5I9ZJybj+aEOU-TKgd%mE69jzxjp$KMN$cndV zS=D8gb0O^4{i8e$@Sa%*!SnCs-9LGn290-Xw+@7|=mG65R^;7ClpsMjxt3uDW}$(W zKJ6r6UB-M3YiRKV;C} z7RaDhHXt85>?BfIX<^-D3LfaRjtDMFtXdtuQPq&L*YC>j&DI`OR_Bg}ZRk1@+A=vm z8ef@y$;K!@h(dlEb?%y%Cgn%&-0ruNH_It=xQ9dqv>*(FmJE9AXR zkay;U5gRuY=*<3p3UhY@EzoN|6dnI8zqoz8*At5V#a$0Ch8ot{p;*#x%ffvCv#ni% z`HVcnb-BO9dew2IGTrQg_`n|R`N=U&gd%qDx|T!TTdCk5PAEf3S)>H`z)pn1kE<;p z*`nFQXaB;C*f{hA@1z!pk@rQ>=7O+hY?w{t6p}et2t*N1T7@f~&uyBbHD4Y?go}}R z9W1E<5wrXTg+EJ@fC`BpSZ7V~Z!<#paxO}F7qj2n7$NG@mVYh_kg6U(ecGBijP`kg z`315|CGHurmQcb6N?$2lUM)d$#;=z#ho?H^sA>g`k%6mZ7dC+%XG}&BmfVw0ogW=^ zIngwmYc5DQlwS2)LV8V=K*mi-NIA%F+)bHx#+Ne6W_@QAs3buGAakc6d445*1IwWf zx*6-AM|O(n69&gJvSGc!OS75sGG^IyF1t*aR|GndDTIg#3Sx9>@G!n-%mJxr{myK2wnTXq*F4SgBpt%Qqxr|N7hNr`Eot99Ke$UF*VL-hAG0$ndWsMtA+auUc`V{Zds6y_a}G z&i7S8+!aC5S0}4z+gICUMHXTck|t7*^|u%Q+ns#m+_(Ed-gVBHl0%Vbj??-Ab}iPL zBuLLjkJNS?J6wf7sj+M=aen1?%aG--mB8cfb>A;7y{Pq7Kzx}Z2M#f&9CA8J2-p}F^_aKYaLxTtrw+^;t>659&QyCcXbBvkbj5|$S zgOyicAX8x87MLI9%4nAT!$zkz*?*7L6rQs~kZ>a#If2NN*~h7*LYFKrhPZE3hXecG}E z2(ej1OWbB{5f+DhfbSLGW+sV4c<9hY9WJm@L9UcD=i>D8)5V^T?RZ_}V*%5Y3hHyg z$1EA!3q==Dw~nnB&NQPWbx^$y`%YSnp7eyx3-;_ppH^EN{yPMsAc9O6VWu*A6O3nl z9P82j$OnQtwzUAkVtFG9N~o1i1UX*wuOYSC$SUisXMg6k2GkmnrC6Aa^bb>e4Dy-`qMo5s^5)eN*NE`!=xQ8|9p3$ChfN) zLQwWjpqPQp21ve~;xtj2P*Jd$$Z&o^_06oq_Ww~t> zg(n5VH}|i-4%qLjfn2xS(NcDPAPett^P;RcMXH9pgGb^R5vW(iz_XgSb<(W;w}b@J zu9!~>eB|j!CZJ!Gg`cF01C$;gmyJ`7WZp9q0jGqNBAYSX)o7!Aq+Y-KpB@rd-iTO1 zr7WF`T@N}ENW!r=aBd`wBEL&nxQo`TVq2?`;X+V8*^>ICn%kC2;TK|LPQozltD8;> zho=*m(73y`?299fAA{BfFAdEQh$_}2ob$RQjq*%42!K;O0=Xz{unUB6X=Uvipu!jh zt!01kJXWvn-Ny;>g)c_ilTDnJ`^*}YgJN8#I-inLi1oO{|4HxYs}YtuA>J!dm>9eq zUf5Z*?XrP1<*N$F)@znP0a*@l~dsyX4e$)265 zMewl)RpEv0TO4SoF2Uyey6a$v;u?Z#fkch6D)kUWiR_DG*ha4w-&H;Nf>6d~xTm6& z0#056SnzM03b*SA9*~=gogkyZT5z6iUG`+^)Nax3&;}P7xs+!r#d3^>nL=;r0ePRp z6~kR0a(R?xaJ2XnI@eAW7mCFsjuTV*N{`G?BM+;UP1=LL4LWpE6BlYNLVFFgMxbR{ zG~hbKo^S{M2m{o5@EXm*TpC^0ODP!%-3>xbTx0;Li>6H&`bpkUo=O9mGPNV*z8S;= z${5iAM-enf(6KD2w_BM+euHA6&4dnd(s-dZFAPD6iB}`pb-r+yQ@4bX^UoTXaRHMa zP5pAMbD1AeqmU?p$dIypNCo7aB-%GGQ%eLA`iP;qQ_cxO$sRIVyM%jW7yUbvXV(7{ zA3GwNH}XO$-&mr+iOl9V5$5Uu2uBl#abbg~r_~%65n!1)FQj{+%&bh`|LdRF63~3v zyJA#txa(7El#}#+w|8*o4sfOwS0Z35eFTzfZw$5Ez4IW5t-XJiICn@d?CRC8Jy%al z&~tBUPtb#(Pm8Y&*z)&?o_uaYx8BA7$}eqob0)vedxRKMD&|? z|52Co8*$z_W?=j0sk(;dprhDp& zNq*yn?Vx{!emXSBg=h1U<o!vjWtiutQ&W{lYkA&qqJ@jfJ)nB&~T(wE_3EL`4fP zKH61@9!Ab505|_G>Kg15hg9RC5X&V0bS}Ub;pUzzTcwEQ?=X@!R-Mg46%MM>TL&^+ z0%uS?#}_-pviT*Bzz`&8WwR4)L_RpiwSAwM=Jz&xE5w|*hq~r4#1VS8-pr0xWz2LJ z|F5vlt7?}=)JPr@Ni&~x;%>(Hx=Gys(EfHEm$+Z1>=}t-;L2xyIs}S#Bq68lezQozJE1*pw{%PrG=3;z=b;M{_!B9dM=kf_tjNefD_tjLW^+})Vve#BjguWSN&e$psX5MD{I|KH9O72OA20}CwoMJlAD4>yg)Rz>=L zx!IwhW=K5N_XEe}x+V;%-MHJEJJ)NjMV@5VKeuetOFctK0$=$$@_IMUI6;2ZjHMuC ztLo*(ld%yd1(`r3omlY;?4OA*?#|*rb)X;=Wj3OvB}HIPDf>>pjOdVbB?^-p=MSBl z`R43zA)A(2iHhhXdI>U`Nt?=q$3Wt$1IY_49IOXO(^H{?289sH!cRWIRHb?J|b5&f^pAWegZC;IgeS2)fXgo9S8~x{3#r9=3PoNL!N7oWY zc@bOS*fU)#4-d;cRLMHA&XOhiBVAzSXwSAAOZn@*PFF7NSquze6H0kIK1blxmTPrq z8K+}H8UTOnkS(P$eZ7($Mqx`xEtA~}8d@z%n5^=4SS{8h4ZY(|DA}*9Pvsm@2u+$* zfWCoM!rC~D5uUJ^`b+HmRjcydw_RBuE~tRwz{VGc~9u5qtlMHd@X`X--5`Mbive+Czb0A+Z(ndZZVM_ zAS%ryBu>|6+!h}Knlw>}LG;O2=C%TLU=*6oDNsi|=8zFe_Wd*%b}XumKOb^3<`71t zfWE*TMBHYcEZ7w8E(ByKJ~}9Z1tOa0B9$!krolocH0N#cX!N_*^^K&PowYz55WLG% z%si&HXw&9qWviD}YovQ>x#{G8>16gYwNgnTlivjLo491rMS?VqAO%SRQ*gYCI?QN) zg@!HrPz~CFi|zjlGg=$7eAA5}pJu+Yp3OU+Wiw&CaP;)bd8nWktv&WXciVQEO)xUY z49OjlDTP_SGFKV`h;4~&@NV*C+$?%$c0(t=;Zh``4xGHB$k>6S^45M;@0mNW{5@9w zo^8V}ax4`zDiDQB$V2-0B91dx%EAX~P$#Z=M(tZz zQ%7C5{R@v`lTMPGbawdWelOFf0!0n)_te$2+;tf!_ODt0Ld)oN|LZqt+5|%%E!C41 z(cvBMeB#;NYxgtWTu{GoR!5!X*h?mY(hQap=1_-I_Y=eX7dSRIJ)_ibLq;D|)Dv|v zR)YBtRXYr&10N6irbZ1v|KEhD{6kWUgBUPyiX7@l*@{7i+iB zRa6XQoGIKQPRzLUq^Vn?EWOXdi=(DJ1O}vX$^?_=5HH;>+x!)2(Iex;Day`ICez%3 zNs{5y{$(O0R|ayD9Oulx1@^Nxui85SR?*%8BeV|AY zh&rMYMM%1hc772Aj_5!!s?`(roj^g_ejM0?BL3coc!4z~gThxRm^dgZW-H*J6ehXJ z8|kOZ`v+zNVmXxP2&Pf1c-{l|bS4`@sV%+GWe&Yf8X{~^ckhS)bJ^nG z=TUtAh;k(1_IkK3>Sf1V*-X16r2QymG3I>`mJ+@*(F2-Y{&hI>7jnN018&F>zDMVJ zlI>Sq<$@&1s!InpM1op*C1JQR`&$wz)nwXo!l)ZVA}&$I=?wDh0H@x#8S|Qs&&oQQ za`W$o8S6vthgx;mu0W1sI{TI~vra3?az&+(!JD{bo*85FPEtPiAW{z^Efk5}}gRj6SKcZyB<0D}W27Fl6 z4n$M@hN(Qu1$=am23Yu6jDmB>OJt6VRCz;ED!^vZ1SjzU?e64pcf)_8qiiOlbUYs? znWsSc1YqVlr%xqm`&PjSX}G9<1&Lzx>?rs z6BTM+rUH1r^gExP%_&Kkk}?Y8Mn?n|%thsgOi`4E`AF< zKYLXyf6Jbl>2!uT#cNLdRRlZu|E$o4sItK$&lpOzjkMT*^{YJ&{9##z;o;U2a`?&f zw6?lI>0bpDsyF@g@5Gwl)U_>^h2p6Bcn+@C9W2;8tzhG@1^$eS-nlSiyyHq=fz1co zCMCcrR%PMR5>$ueC1*n=q&nCr-@6#Qv1I=4(FIBRf_8xp$?A%9H5@&=b2FzytVU7! z#a&MLW3p4Rg1JE1N;69;c123tMnuJ&D3BjRmHrE{8l@0xkVk#^vKgmADAuubL{<4| zxg++Un(=ZZo`;d>xi{lTuP{_`S>M1c{68c3CLi1(AG0J0<8Vt(Ue zYau4=(=r8=6_1r=^Ga&59-+Rl&SQ58F5af0;fnDg$|9#m%F>P>XmxCfe5?t9P?xd< z^NS7t+Oj-h63e@6*aKWhA&CBl46>&K2@l6q(QM_weOhO^-*<4Q01N(oEHuVUa56QG z$lidgkK#8dLB>ERjzFxzLcJ8Y_kYL_iW%iFN@MZrg*s#B$$y*CCW%Vru*HWruZDhl zquw7|QMiRCB|RqX1Ib#lsLe5+`bgp&l;GF$(WXzR_S{Ki_Vt8?u7f`#_bBGMPT;jrly2LEb9Ham5XX~OSb z7){|=-bn6(dgQd{pDR<>J$og2zzOIL{4O=BYso=kZbg_Pb~IHz#h>44I>xG`n<5of=iP+&3-m9M2IC-%kilp%+YUm7G^Y&V7~ zqW4@C5YF5g5wvlXEyoEl`TZK8rqkVGV=}9}L2&>GNN$R^`halGaiBiI z%gTAEGzs(C7!|l1Z&iQD)8}`fLzzcxLgjE5D4haCzX6dWSC+|@u$h8-TbuWcW4Yb? z>FX7X|Lapq0GM!yH&Bfpy6(cVF0n>;PsN5nL^^tOoT*Hndx;H{G#QqSwu9m@MAB1d z6)b9l%INY^F(2)m7^pqb)dw|t|1SJxI98)B87v`tF&y86);^DFrls}b*m=Vlft${sUVY31!WS~qL(eleaVt;l#mE89-hrqmz3~iEEaw>oGrNPfsML#TwkB6s`+M zAMiQ(Oj4k>@bZHDAERi&rJcle=Z}>S6|S~9VTmpmP0)B;r9-~@<~Ig;C9#BpDHVTU zpNb^r;I81RvT}96_*%QI_Q$%54G}kZKL!eH6AYk3$oWcLC+UXBoBtWwm{@$U>BP(} z9!yiNui7}SLM&Fw+~Uc@oyF{24Na!aY2T{;wL_J^(CJ3VpK%&isHk}L#!&N}ReuO& zqY?aDYtHhf@A9B=#1%SI?ar}9YH>5}aM1*=1~~{#9l5=~b1GmQX9QQ>S?E#wv`eF+ z5asd&em%ZT7F@@*MaH+|kT6g@9cG9QO%VPI#a2sEzraSY#?Ndm@Yj|#`zVKbclYgFkd6~aY4qBbZ9PhyxywCbgCdqA3vRe=})iBar!4B0@)#MS>(8 zLWQMNn+l#RMHvvIyEl4u;;!-C0oc7i*MHs>BT$A1nR+i!94923XGs7_=d01KYw+Jl zVcBq2a8i?U_U(U}K}U1_E|iAO9gftmi(o+`CO&U5kNL0AMYJq}VNOzV>Ga6jTlb2) z8&ZUVIQ?dtxbR1LH2_IFB3Bqyu^*JH9r<`UT8B-Xsp<#@r1N$?a(#zOz?oYK~-m_)IqN7&8(!h#gOX6!mOhy zPb#HDdwTpd39}BcZx57Sd!QYR$UX6QZ!G^ImfY*t3~Fq=+TkwXm0b;4$lmrqf)v0b zJ%J^MYH#?6*Y+(?mQPvu>5)5du!HzE0rQd&`6Upwyy{b1bd_{-L^R-7#(8gtbuAI~ zy5rzVMGW*dM61Ek45%Ct)wiWGv@N@vpWmW<7t4E4(98}LhuRsIOe@1fB8*_d7B|%v zHKKWM7w9RYo6#D7_DQt{X?;B7T8s=F3;2)r(?w!N*`u(NwBINFWLnG_n4a`xzPPkd@-37pDwPe;-i#l^N<;jLno!VKbVC{4!-P(HPn?VV+>kG?}UVY&YW>gk;tZ2F^#UDXe&hI>l& zUI7j@*jEZDT5st@x!!HfsS53=z%j(~quG@IK|40+wzEIe{&A$4m*7GJ6Cn{_n;mj~ z{-=S00mWl=$%BGqKj!R{bOuC@uEu(Qkx1e|sND`Lk47<;I-Bu*Q33O*Tq3F;)BgI} z)a^~$4*OVN*qP0Ib@Kfsw-RfKCmT{#&U{?h3Lb%@t}XpXnIGuTn>q zLGeFHmFc|c^a%cGXGK`YN%cjg-y15x`{|qO)P<+=oQIbj81e~DxDJ^qI>uS~TUvh% zsT1AnhE2pZZ(IYF$c8btkR^Dth=LfR!BGkPRq#`sKO}vy564hwbtQe-s?!GiO&1WV zqgJXHW$YP=vvHNR3URBEc637#vYP0MIu}L|vN*ip30xcc4yi!o{JAgx^8`lQ95$0D z;)(imVOQB1$UO(^)>OU`Edn$o8bl_rALz@XhoeuNu#3zkvW zlm$?!ANdMPq@o2nUVEtH(}LnKR5aIDfr413{~+Cl%8aYX$2}BqC*2btqRhF~0-61O z1Wv-sEK6O5O5RVaG6}*=K+XV2plmRX@KriSfMZy7MT_qC)#Ks>RmJ2d@{7B343H}f zdY+ISATNHoD3E7V}g}Di% z3R5pjn9w<}|SB;>l=J6OCTKN)Yv@TZwov6gnJF?pw6Ic=c7Qg_f{25Hk|*$8cu!>|LM&a@kPD z%aGNZbK4wE&cKQ@Um?Zy=iy~5`@abFBmbfOUN+P6DcsBue_sw#J#3$Lw+8Y)I_=YE|VPdH^=!Cc}o`A>McDSfv<^Lv6D?W zvi$e*{`IsBbMhZ=X^k>93@CFGH}plV4tH)KSMOD*Uf=8PDIe!WU>Z{lM$)qtv+K;| z_pTcZ9RYO~9Dd`e%zFzb&F{RwB75F@=@sHK znL!`Abp%k|G7A1eyh_`Svy){VSMuDF(76lan&s^ddlPwuJ_7gq0I^k1Wg=cyzfCl9 zDNMNmP~9{4CS*jdIb^HF7?TKH>o*@Gs8Ld(!wDxPrk3C~y%|fi_`Z0_f9Uki>g50L z1yCVVwh@1RscbXqVL2mix1%No&K7Z~pp6d7fGi9Y8M9jw6nb~@-2LekT%mNTPFd?I zB3JHzSf~9b)xFSrI^Zk_n+_0N9UOcb!M__$-06^hKLGyK^0X7MKL9sY)`;Wz*voCw z50N91VHzDw+GvWcwO+sHozn3BMK+R(;3EaM?2}W&)+s7j1>N(l9{Y9Xvd{cy56mSm z_ny=FOJ4_@trD+)nR2i^qi%5LDeP{}CiC~a*H-uXdEahkZbQL8v2IycwE^%P(?R`-^~POaa?D)z zJEZ;#^*W`y?9?L4FZvd6I55|K)jgZ~sBsJFj#^XM<0_vB@y1cxWg79+#Vf$3d%wsJ z??9W#%|QRFmeTkedLj737Zzg=9KP7XbCpI&^rzI!o&$M+5{`}wVTIg!rh7040U#D8 zB0@z&qt>fCk6nK*bqx;6DK0kr{^qzFPOFl?z$`ZO{S$cQaK0@<5=5A4)|sCE^tji8 z^E0r$K04BBWCf&g=dbq=<@Bzc9mq1(f|!2*pP(52`TH>>U+l))TFqV<2REMY%R*{- z9sIC2_ju+KY76E(oMyD&-+f#!+|{b0C;L_tG^mTzgbx)sA#-7Z)V!&xN;uve{g@(HZ0hg+?+vg#&lB;wwJ{-xnWnw7@k;9Ie2{;lf8SG;E7;@oVv| z2lL#@0xDOc=|MYpQa51-q9iz?yP^e&MhtZjW47b<2joQp4?Wmp>&i3kE^izIv??7I zq~(w12`4YTp%jVNHhCXe!LQ?B6Od}D5~Lt=#~5D7!}pyv_=)+YSk_gpEZE(-nqym# zOD$cyI}g6O$absY&rAaJcMs0Tu>BJd<0u<#ms*ymX|li*Dw*Uk9#qp8GJ z>)}%JH;WO9>j+_xykBfCuZ=uOzQ-d}(D^U7n^fOr6Nn>^vc$okdcu(t$ZEy}$u5|>w`BQdKW>?5(0rzKnc_Gg&}2?9s7QoEx=F9;=dHyfVL=*NY3S`^ zR>T)%j)FmAN<8zEe5uT|ZnS#O7*po}U^s`KVO0i@e`omTi*>IzR^9zH8#Inz_c@&I zG_2WLg&8ncbe#2CmDu&$apMp5Rc(J){rE_|e@BewU@HaU&(iDpW?>t}Asu%nD{tHm zv3}rs@L+nz4DaDH7owg&bQ!)X*4??c&2}TRfqP~x^NsC3iDL}qGQ3NCc)S0EgUlaV zPk!*uep~1XkD71qwawn{G@OTQ`FYA0JK3hA7<$Vg&DV$Ir!F)e$(YH0nv(a*K%1LNz7TVs#B*gKKlc)pppjIa5?yP@9nG-+e{K1p!$zcqC-?fDhLizv=M{ z%+WwPhykEBp=tl$zdd_7V~&xS$CX1Bdh`81-3{Rr3RwxD4q^{;>~Ras55PcZ^NHc& zl?QG#)rGv5oV*PD%FRD-r)9tsG9C#DR`=9)n}{N|u76qLv0>cvSHm*35Up&|+hz9) zl81E;7Ibgxe!Xp}>4imWx(mD-GAvszY<0nIvQ@R)`A<;Soadq0dma}xU;h}&+gYU?H(DOI zC>X!At712D*Jst29ad`SJ!R>>mS(&)cj~{xn0ij08kPAhym8YxWB8?flkS^2K4U!n zH5Yq12Pau!zDj(nD~cgDm1q2^RHjl6zMjk9_eQme!cq`_xa_=)Xu6THu|}2rA1&fQ z_|20%LZ~;tFoyCG&~tHuL@|zT5kyMFFLK48NePS^>rfX#LO} zx;t>sA$!^}flrI%t!7*!B$;=>58UcVMqoKNM0{T*GZZ+vDH@kdx$t#CfKMRsE^3bS zolnC3kIkv1#L6xmY1H=Xzk8V^KNK?0{g>v{2p*^P0*&BZx#slZm3ht*?)$62jSJ!N zU5uzAv3hrUZTPXQ?&`vvM@Nqcur80zeje2mUwgFeuL>R|2RpyNu`&edANxe)-L&2p z7Irxp$o{+OcfW;wVA9mtdWq7(-QwzT40vse)qf9Ep3UUq<*%OW_MXDFS)D9o=jA<9 z_Wv7HqWR48!Ni7~YJpI0@9W}ZZ*nn#Bt1X^SkQM75YOdI^bUiKG zO{@zJe;9Hc4#{0~WdEJSkzkOdI-Jly64HJ%bx|Z>x#Nab&?sAMW~TMs=|8(eowm(j zG$E!pIAQMVmhR;{P3Tdr4mZ0xmqaya?)t7+QnfTj(&u=FT(I#w1Is(RVLtOn9ghB4 z2OE$RL!w5y9*dej;rDR`VgGm4Upv1Jt0*OiZ`?JaHeR>Cx$uT`A(H0<<^0!byKpxt zGwIAfyK3cCH;HGw=kS_dN8Vf4Kkae22%j7H_uZ!9)km#;!Cqpkp^bAXTK{lOW`pHX zag@@z1I*n}&i?OpNP*Z}Ff!if1+r++}+AmM?%HtY##9w9~-x2mi(Lx%-8L$FX_%<4Th^kMRhF<&`17AMB{M>HD2{bHnem z_>T7D_*?r}UtFM-rQ0K~qRX0a{#!OHi+eOJGSu~*)sy!S59c}eyuMH2{ffS0M9d~< zjs!~H(24cJvZFTw9w!V7E{fNly?N+#12Q>hZ8jJtp1oyH{&TQ}&l=TJP$e%-9@fKx zwGD44$thJlf+|zYEm?Qa;M2V5lT>As%(Hne=paD%d6Mj!qi<)_?s zVaey$fYx`#@sw>gW2Tl>9^G&zpRkNl_EhqcG|n@h2nYOS?jA6*0XoVDWvwM(@f^d+ ztBsS6lTV(r)fP_ubOaV~IObDU#7S>H6D3>R3%d8C2XBD9Co>4%1OMx>(P~B3LInin z7)%dQ(8A$xFaZ<8r-`oiUT1h&)zm4%J90GAnC5sC2^!KzD#2#sdvyHOA58o(XoCC* zWo|@9tPb*u(x8t};r3DH5GXbP*Xbqysamky16?0xMd8SkX)1+-(o93Jqjm?a=V9ya*m%_gXM7Us`%fpf_J)@^yIL1F-xhn17rX5w`3T}-4<|A@m z9yzT0!?QaZ k(EL@@!9L2u7Z%C1#c3mlf8jOFl>9v+kcHZ2<{k*i*6gI1crMWhH zO^SJ4TFV?S)J*YQ5WY>vVVO?7GI?T*Mws;{veGvY@=vVPb=O!U8J;Mr3<<9w-V%9H);yI64yisbME^QUjSP)kIHoO8=QYv?NbeLW^7gT4c~u)e#*<}pyihITm3v0f%l`ErH9B1Bj*;MOgupj z$zG+Z4&B%T@>okhyX3sXRQY*6pOw%XxEi4p4Zf4HNQ3wCVT$5Z8_Ti=HAq(5bh=z^ z(Y+ic!p-C>;r~?SU$)eGXCgL|>gKjoTP7MK3PRkS(eLj{rb~_TIl_`ki8sU0d9LH5 zzudxb-!3)gr4tUJ&M90m1}VCW9d!6o^B_gwS!(-WWk z@&%CQxFa4XoR^@qYP6Z+4^p?bfn_o^8!&Z)Pts9YgDw-5&TR(|U8tJ$YD(-Jgfc z?j$(<;Qv*$qhw)murPIP&K5V1jjpV)I!AN6r+$k##j~p^ zWk3FExOV9;x-oaboObr{r3j^GBr{^*5);!6{EN!5%V_RVD-#i=iYEF$7r61tLi+hj zYTMSlX=$;j$=sN~jJ%Dg@V>@$sXx{z*)SaVt=$;rb8CjXlltosH;T9$t? zNP*w@9`tcg;ncO$P49`kmn!4Y${>X`?x@X6Pkry82cr@ap?m+Phe`ikqXa{0*M+K; z?N(mZI0%g8Ww7ZjSKOpTzto1>dQZ$BMy6G>@7Nh5Z=Ibmniu2by;^kSI~ijVm&Bpy z!~X909o|+A1wWc|jEkAY$v515WQR@c8iGa3M+Z(;;Gvo?`!aB+P>8jXDt+W21=YYt z4qAL7VXgCq6-sRBW1o*;R?=fuF0o>N*({(=Kx`5007mRF(l+PAK&73>m=Aj5+!bOxH%k zq-EKGYoi-0c?lnoyED5pL8|Tvgh>xH=MI5AxmMc2uN`Nuz&`VdL2pRj_pxVtzV0jN z*Sl7q3Bz1shz2QX5syOl;+!g^^z0C{*U!#$SEA5I1yr@jcpk9V#hu=3l z`QqHQjoKHlsndArwg{d#sT+ zyK})=Vp{99|7hi#TtJMQuFjb7C9H@h!e=Q-T3a7!@@N&S^RJc!wT_v>~G zvf-LAfv!54sNYubLjD8kInYio2L_OylOAguguB|}WFCEW0L$9I2ut8};~-e#F@rcM zy*~y7h7NM1;1Nv9s*qXj!``?vR#{lst>vr&yQ&_dCjBC?4>?)EUdRH3r^Ru{AV||b&)Xjv@xLXT%J$U zYFY-0KMGFRZLy6hpR2sQNLc#zP12{q&#|p$86AS0YYsD6i`toNNYARQ)B7q1qKBTy zhjkn8Xv=e05l6NC>;Il8_!?G6b&Q})W_3-&f|1CV)W>~+!B<1VOXjrz9vz&9cVNzI zpQ>gPC8ThVjw7Y~;j7=*Pa?)5P8&>}6Kn|2cRZ}!-^~$*_AOxYmzAJ9;=Zl zx+S@Ma;|;vLJP(dcKq`VtT=))bbTKfW*LNbHkwHVx0lxc)%Rfm`fZRoH%HKN0=p2fGYe z*e~$szcsH)RR_pn?=h1oXExP8r1Z+7X~x|A0v}aN2l>0z`|FqR5DEQoeHEPF=gZr_ zUMYkN6_in=co^=ehUfzO^**vq$9zzrCHyvTl}V`g3!&ujq4lAj!}9kSZ*N&3cYTk0 zSrkTaGj4u3f}XLGl9!GUhDxZJ?TdKA&5=+Pv^TMPlgxbO@D_91gB}DN8KC1q^j)UD z&XJb-d!*>$z?x-R!2An7-{1BV+`wbKqA#iAXj5 z_x*F3wr&dKrYi#eF`Xb7fnG)vzmOjF^xpY-n7j12f+4w0$c$>PsP&CWd;N92S?0*K zZFBNORFxF$yyl1<5$hUO9D`Z}INKvhT8wL z-|1KQ(7*m~25sAePUZT&@b{H4C2UNi*6N1mgYW44$PTrQ_4$$Bz|1;s`mz(+a;eYK z8y1P}1A_iG1gBfp75iN#0>QLDQT&h>)35k-#?=@;>+qlUti1T;h42d2WrRs^2#pX2 z7(c}hwE()y@D6$k6%)&z094Eqsdhcsd&pKmK0zSnn7I+VleItJDnQvPgFDfSJ{pC5 z3>du^y1%ob#@g?zutfW|&dIw&g_d#PZPFk6;b{Y7DbkPl5xKNVj)?Vb(wCvuWdXdLM@7RQ#lR}eMGgWtBB7Ot^)Fej0Y%TRB{#?`MORVUMx9wa zP5RFPv8HtFYxqr8F4Cy!3NAK2*Abig?GbD^!~Q*0;Hc(KW`{20;lp(UveiwNGTU>5 ztOT6cz!9^6)PfRLto(Q=o0OtEEgT%9)XJRQsx$HYfWCzx&Pr!U&1t%t8rwc+Ab1a0nBx!h&k&S!_ z*6}UEshP2Mx2(@YUf&!I*52Qv-xfgVvIgq^{*;d}dFBJV&;_51B84!NcHVmCicE|7 z9)*pp!*6O(aKw=P?mfFS_Ot!Mer#OiGmW3~=VI|cG>{+&*8te%u66YKFd`Q0NV>?o zCBx8rO$&6N4D=;$8s;?+ClI{vvXP3qmU*vXS)QSque;HLq67@Xvx8>c8+GbK_GYZU z-KgFFTfeupVV?h0sroVNE!T9$=i=JJ?8Cj9?7+?P(P=$@yW{MQ-<4}Z1S259;4ghB zTwp;MGYgL9vzT2{QRyBzPrw9ugHi2@hN3!X=rgb%kE)>&kE7^>Q-IyEC_|p@4|9J@ z+UTlW<8=&^9xNZWJXB=a?nys3JNH*{U%}vq$FZShvL~n)D?Z05y{u|c(CKtiEIq^H zB%ak3n?2!Ki5KKIJa~WLO@E!c(Dn029oi&#=(g#?!81K>NI{~3!a2;m@KpPcmwztI z5nHvN9{4T#YesOQJ|Iro=yQ8}-u*w$2P>Z4My}0}%=m|wt@HxIlPJO@c|Q(x^!l6J z=;bpoIjJNgSS0>)a4FO2GHIZv_xmDaB7a3zWw+f)SB_p({gZyW zvywcbT9W?=ee9S|bNo7i-2c0bkc+%0ALa8}WZlZ-T^I7piiChei>gYzxY1sOC|p9{ zdnMYX4NLFf@3KvUz~rt=(yaQSEtd2OTaI#N-?98P%yQUC|(vC=J z%>2B`a)^RIvi})v5z&i}C`|SQ<`ZK#Cx|fPp7msQZy9EH znf=7m9?^S2H(qj#JZxooDrnlT3+v!h8}I>E-GZGgXD=;P`~Et%iv3FIUyu*aH2t26 z3Pl8RIQoz}`wpg23YYK=S6ca+b(NJ}o>_BOaAX86{U=Qxf8r=^ENyr#v*7ebUg|lO zeZR3MX?8E}({uFKf;^?62AT*h`OBvWpM4Q0@)VKp2xJ*C)dQbzi$j~-?*BcPITF!^ zvlf;5Bo9=llwLnZIIGa&q4Y@-ip?Na88)c~1*btOc zLZLtJvu&6~l{@$1QOjyJ6gvxcNqbZV`aElS!C?f5Uh)INIMfb-@FpNOU;osKb=ZNY z5}``O4REoF_Q2}(0UlgWrJ;{~KFvX9X&yZy42*`m94jAKlOMm0To_$Ue|vnkVhr+q zX%{4U{=NyBQSZOB>v&c;)rC{Z;n1CR>WMg^#FW7X6SRWO$S`?3B2hs9Y^+lMyh+??ln`X*(%t8A zA@~0L*$SC=M!xbdc)6@;bg{`HC-~@OS_$LnzlG1O`$RRu*ADuZnC#1~aX@5ZMKkwqahp@r36KonW+)_{ zwOZtZz4-m*^Sy}ESc*@S9)rUjd?W0>gEveL6_=>r&~=W*2|H1EZokk4jQ3MVzJo=< zd$30ZbiPLL!5S*tz`}+89<$}(3FClOp(^laU~`pZ;%i@QNe5Z%{}nQuGrvIx zc;x-}N?H=X2OeT?OjatL`XRRN@pYRZ0CQ9erjEpeF(~x?HIv9u0Yg?zyzXp<-Z0|5 zdf1NeI?MxYm;_Yo!_22q;;0j=^KYOJFmWs;S0xGxJixo{SPHr{3lPo3yX$Y)E`>$X2l*e!f;xt)->4 z&~1Q2Zf5N5%{x48Je+*32YZ@hzDRli?a%5RuD>&Ywqj|IfY$on;MymS%V!$>IU0D7 zmXhg?w+2!Ls^@t-OgQ3&GZT07eElGB(K&hF;tuh1gkNc52j`30aP|iT6x9(op>~B8 zTTw6^6D2h!)c83x1!-cspn$hKXYy*z9bA_u!9i#T-~858z*HBt_Ctehjp zLp}N`@+4_dfPN2ocZ{3^TpGq(NH!TLeY|z^_9c^z6XXD+BWl^fa~E(}T#%?0m1_Px zA3lfMyD$&mJLf9SRy_1yzCLncu?oDb3&gI8cHHp-Oy_To_g``fWo zztLFhyyxX5boEJ*j48jbh92TBv~%Y+4|z zL*q~p6xoz${rSE0?32r{&YU1e(+sKDWp03o6#jVMNFGZGzr39*rvv3LO=VxjcD}Nw z@mNTb^|W>#A2~iv(lAKA_#$jpOVGMK|37gA$EEv|iK}BI!6&Egnu@v2BF9STckcnB2r201*~5&tl#;=BZu{yCe#kyG?I6qo**O2@`(cFZLR^6+!wpe^ zzgOWbt{b@irB`Jtba%!qUSgsi&uZYbmy=#i2kK>P=c8t7B!&+PjaSNk8%DODR;>GL zD6rzi*!M|Sae_H-m3L$Otmn~j)u*w;2cKDM8&(v`!R}~6>ZfJ_(Uev%Oe1o-rX16> zH#xZXS)NbdW$32vuP*w*Q1kg=SUgAjeOXvHs%Tu`vHy-N#-&n+q56o6OXiVF0!m6q z7reRLlF+Y1lS|7M7X%c;dlTld5^i!(|3~LX$gi0v=+{_T6buiHs=54BBNe}br9WD# za`0hv1TSL^_GgAHne!B!z}qE zbU*D-D~>X8V{;O&@GdeY_gk}v)%5TJSplI3d(b>tc?YR;e$H;$jBbIvZZ>mOr)gew zNj?pG6Tkro5rhZKfTI5|r;W_rn5WZ$TwuYS+GHyPNW=@5h z?vt=;8QJXI)1qjH6s#gcecXcz?}ejv*mtxifY+ON=io&fgTG}=OC)v3%Fe5G8$ zwb@KlOZ9t7aP_w}2{1R^UdSID^CCCbp7dY+;H-)+^0bT&#j@U0SLDdYc-O0P0T<-cORwG(PI0 zx4(&lsMfLe)+8k)SkzWH7h3HBdCoY?r`(hZr|tZv?@Tat+flrd?qH&WKWpkb+f}>9 zN(S|iniq?g9z=y!ad5w9T%!^mg6b7fwsB~CH($IHDSU7-H3-0Ks@vV!F@r%`*Y#rUr(xM5=gNgI}m1LUX*}j`Ap!H zZ_mo8rB=EtqJr_c048P-~fS%bn?}`RgZ38`b7DCttzp+4orm$eMeYk6z~f zoe@V#oKN|Pa-I#vBxRdC4Y-ejtuQiQugtevtn3 zkeQ?F3&as2vfn}8=P7M<;o#Q3*WXVnk@>tRer>I0XwuQcr_xKZ0aL`mz4@{;S$cqN z560GpL7|_h;ingDEaINdMRlA{Z~oGGsPqg>Ns@O#K9BGLcz1Xtfvf=2DJ*itmb7rc z7I$nmi$b)mu5I4Dix7GBq4sCJ8z3@zVeHp_u|6#*4=ilIfi|!< z;#DCYWQld}83`g7H40#QeG^Cv`IF?U!Ur3@zH^+MDdmS9S7!Z?DMrUgquReO8J`6w>C3?|G zn9G^UWr6mBs;0J2Mo48?zRyLQAyoQ*tVNpnRo%ZS<;HJ~olmruTwO@kNCNF!inCV* zGUZI@2?wxmIyp6p%*_n#r~8(HgB+$8uLQJtm~x%BmV3R0k30tXdJkd!s8iOQV}bHZ zJ`SS0KR>^-e1K>_d+eB4S|frqtHCIv(hsOs$JP^nz8SOSh3|rbn}7 z96S{kq;hrhjI{E#%{J$#@>k881I{c1&)8b zn*e$L?i^k~DeoXjzNy<%@gD97o|3o>z_o*^r|LKTwvx=Y{*0_eh&tDRsk0PcVeH37Qeh;;?3?xIjmxTD5Ya-F#MF$4Aq$)9EyH-6lH2fg_Cm`;OHTOZUbC0 z7YTRRp=%{zuq3gfYVui8y7SA`zx5u|zD$1jRIupq4Vp*Imc3VrHj3q_?@J}v(l`H; zeDWN0Yc&8p#cX;ayMfYK2%mV6l7eN20#$Z?$d6RaSG-O?+MEftPVM+qlt0bk&E{e~ ztK+M+|xxCHaw!d9YrBV-$B|DbUv_$L12VYB~p)mofmJVSY z;?TWYcW*zjXg%a@etXOONK~SaEG5>)QR)3$-p9$MMLcn>hsX1d^&Ii|A?LU&1CoF)g=dXL(JyoP90cV=KRi)M|bcyn^s5j_zY%0WE|7RhYNZwBC zs6KOiwihrDD#gF{}oXx#JUx1}S4!OoMC_5>?RJl^B@!3dTz#lj;uuyUJk9r`z z@XeUdM^qwuKL0e<*O2oED$smFliG&mLvPC{J{I;Jfbs(DjZGA6dEH-UwYK}qwUhzi z*F9B`D!u2CQ#t!`?IeW;_BQgUCrGV061IbW+ErucZE_6KYaASgIzaX9CwLTuvPmf^ zh-caGV@Ft1C0^Be2^XmYkAtApuC|%FK%===oI6uNgzglKJ-ww>5SGHOnne|@=|UjM z>~8?CmjZsXWQWiLwByi|4D(fWVFCi#H!gpucM42s%%qF-V;(UMjEYbVVr>h6-rJYl z8zUGa#daP!&wqDG2abPUB_=}9jCT4_Ves~dHH7K0?0x`Kc{qFj<$?HRyH3jvsKXCP z7TSPSG=akPU;~`~?Cq|hXv2~o0|+WFdncmpn9=h@qn(V4l;4MI-pbiyzW^WuB>#Df zUl+!=7W)7L%5EJL9((iCW z^n&E^v!}e@G!nq8U0Z1dRCTSJr5Fk zq$UeaCBo%4U>-5&Rpelg;BDy?Yv6eKEVjehHR3OxQ6?*(b7 zpo#7)4Z$`S?TiF<*-OvG?f=kHF$hJre^0-V9alx9?>4*zh`)oQKk>rGQ9<9eHY9BVLrbG~O?1=f{9eC`ia!?#wNiRJ8fvU>ccsuNsTcE4R z!~MX~oCRE#0=tKZKq?QQ(!5#xlSe5K);MxTJ!#X@@*LB ze8LZI_=`8ybpruiDi;TZ2$d1iH%7|WNgp!5kS`27Io7JUT`ByEGS=Q2Ei`u$ZKb^w zo?TbEU9dnYAD2_1G#+Z$Im0ni=am2D$Q19%a{_hIl+csmbGJd*Rh9wD1wXJn7%sF| zm{~Dh)3kJ0t14Nz7mv9oE_-NkNstfT=s{OE{X7INUBJEpP%5Z$TOY#^T%u0xY=icz_xDsmTNesb{kP5!X~{BA zh+nrr7uUM)GH6R3z+Cp3vmRbeu2e!FAft=w+@em=DeY?*oNkuR_0Wxb}&8_@XR$sfHCVW0v30K!US>CrU=Y>{> z3fVqoca)@6&%T@*kMt9OZ3&(_Ssg(C{cWbJx;<`IDGnR66B3 zv(h_xhZWCAT*hPB%uULbhFByD)Vh`xyg=UROVl#VN|}2fph;`H&sOU&*vuXs9DIQf z-8~gRN@|M|AUw{`86PE1ISax5otNRK#UBx9YXDXvhb(k1DwELCN&0TXOdt8#v1X=J zPHzrH2p`7$tvm4fRd>Pt4uq z>U=hz+%;{)R>Z5rEuM2J>OKAaq(sWw8xyh&y{IMd*G-;ym1Xn837^f6Q`YC?6lj zjSQLD9u4`kn*Z&_$A+=lkVR5d+?gX1nDP8{*phr=qJ9gqkL8Bs3(H&aWySi1uyT;* z8}dsa6?ks%LOEUx+X|~45uxMf)2BSZRxW7oLZ#)_U=>|t-b!Ee?t}V6-@AM$GCQi% z@}ule$m;3mLn)T9%Gtd&NMM&s{YKr|8;UTUqfj>8!Y?9nZjghErt|Qa~Z% z;i+D_Xn)dPWU2fmCf_Pl`gc1L+jusl>5@KcBxob{H-)YbzrA;PhD9J2A|5dI5xvd-@Kxgbrvv zHGAW8Ignh?g&Oo}PUjQfZ&8?vp5Nd$5NFw#JW-*ng!VXB^51RmI$;67QndN*X5BI~8=GS6_Eb2L7%UihpP1r(LUgR-pa0sqHwEvd! zxN*r~{h*B_!uc~3RGdiNm0@-DCbNHMKXP;DRA9f{H#=?jZh$7M>LOTuYoP7x999ou z?{k7}XU}#52|{7H8_m1XAv^1D;e~5MqOiyus?QX#`z_0-&inV`*o^O!9_-c+@V~m0 zgJz5xKf6qWZmbZJklC<6+f6M4IrdGOM}|WN+@@Fc{o9x{p07LIeW|0^5yo4Z?6OYU z*{fe+-)mXVGL2fRfnE=fmA{?}hwBGluSPZ7GhE~q9jE)uxyE0l@Z{g+yQpleVke{I z!0#x0`)1nJ`!`3G=%Xi&e5I$F)4A@n=S!VJKf9j-%qt2foaX4Y?jLkzlz;hD5NJz{ z<$vChD+Wu$MzdE*IC;Dt!yn|@yommX-c`%7UCl&=)+i2Tm|lsd>kdN8Vfj z#|S3Zeqkq;B23Ie-gNpi*bSj^!af_D?uZlftZdVeZeV!_30bVkt@LuH7z`xaKsGm= z`3fBcdHDh~NLEh-X6ZVkm;%+)xuloYW2H{tyCKA^(9t<=YL*`k7*50Lz@GfZWQaUW zLUn-nY~k!38XW0vmaoI$WE*3@0zc2Ais30RoTU=QZT~H!FcLzFV*V6nccjmQR7f?Z zEqxv=^u3ayjBz(^#D*7Ka%x=7<8QXajyPEJ;N?K|3cPYrR1z$m;b3 zblvanK!bziPD&Oejq1jV^|`=YUxpD775(}648Q)=qRqNB702|J;xh~%L)4>wpw=w$ zl>buyHI^5RHn~Gv1wxx zAb=;LX!fJiBiRz&9rRc4^`Islw40TE?TW^bjGFgch_SR<#r7Y@QGk@u>}eHA*}Z22 zk1s^G?J3UH=N=rW5BjZ@V~=w4ZjiWi}FxR#=#>H%qPK!CIkLIR`XVJUeVGwI|iQ|BfM;3 zl)a}}UJKY1P|bPyv4{-mMYjc=tSDH2)wZ6{1mM#Q%jv z#WJi5{u6x~rH!btjAn{2f&c_X#j$MePf@<(b4mw@I;tV)(Jk`XWjm8zk{~TRhue68 z)RK}K1UY_FgjdIZKOk1uwGq!2DgAR3rJM|~JsewFlGBhIO=kC~{Mf0u!W8AwcH7%3 zCw5w(p~SMbW70a_?N94Bc>cOPbdPzlTuSO#F@1WR=D;|}m)xCL#-*aB{$LYftnscSdDz~H z?0wL(CH`@-@oPFtln}Zb7a17|+FPIidGna0Ew)#sKJ6dNf7Rx`Tt8Zf@Oy7;xU|hNBubgzW7@)4fAT=`{P17G0V{cIW6ke-QG($ z4h@T?nwufM`Pm>(r|HX?gA#Ox*E|j&F?Q)+{|##t&LsOKtXxwHA!9^9%2(_oO0)M9 zyy*Qh#LnaF%R-=%(h=N&^dwfd!%9ul#uHcj=(-3GbPEz14ydy+P5QjTO*eJdf`=~X zf}<4-#hQSjso_?m&LJ0`djiAB=;ZfZ5Tacr{sUcaUtto}yF;XSt)?$uC*eD|6Exb< z%_uL*FRe}NnY5i0z;1YH14b5}VtY{FSC&Eb)Q#G`4`7nQ!BHL1z&Hywbt+BtgjK!N>mePHJ_Oc2XpyndU;QE-H16^X2Qi}1`zS z{cbu?J`6$7zKiovb!piuLDO@f->EG4#!Ky;)HBdbRuf zwHofQh-L~=Dc*XNg+6?-dB85-!%b=XGvq<03^&w(KuoA-~B^vGdPfU{n zdGqQ^9MW$^_o~K|$)CSO;Xu2vW+KdG@ET|+qI60U9}KGQ6m`SjLkgZz7P3)`3S zzmvI=aonp|a44YxNVVUKNLnNpD(rnB05YG>>~ljmQq=ZD$x8e=J?Rl_*CYM?HG+K? z#`8_TWPncY;~WzXk6~Zb$Da&f5r|7Uo;wv9bk(N9C_LqFwhlkqbWA|93U{hLOW)Q1 zyF2@5wKpI%C89yy5$3#37;y9=6C~IHzk5Jn`k-J$Inz1?#!&qsvJwL*38P&1tLc|O z`Oh{*a|uV~ExAn*2N$Mndh8PN(J!g#qQEoI9%`xoxV>8*-7rwM)=2?%WgZm5zggK} zvw;Sx(_Lh=7U4vfOor;^)j07X3iI3QP(EnT`n!_b--5b|sXV2CCm4dnRo@ASIP--2IgD;l};> zu{gOLgR$~yV!b{GJVoc@GPAMfT}#ZRpZzym+z{DndgPvm1#NZ?)@BaQ9^Kt|;d=lxMV%Xf^{RUUl#z#upT-wY{`9BT^J>2lwUJ8#LnuYoZ?L4G zem~>Zt?iwv@ zVJoGGyKrOo%&i^){cPkpR3G*|_r8nyu+XQ^hZJ3bTwtZG($qtUz1gT6vhY~*!XAUpCE*btK6UUemUyDv_<%*&WwOF-8nhF>wjGx zao{RwTt$l|m2j~slX+;WbKp0PFtLgMc*G+^O-Sc13^D%7jk_2eQpbJpEH9)q&5Z(T zO-*7G*)quF^O1s(sFSEqaBmfuy(iDfxkNvti#16b)p=`CwAbuNsmN3q=<;6jxVNX8 znlYxDGS0J(-6f!4NP&IEGMe2YfK>CQmuZjP)x3_ClITpQVy1Vk_BLB1Api2|vkiQn zQsEzLwXpV`wB?{#q{r_s*NF0Q@X(PFa>x`8R0-M}y}X)xavb&k><`*hS6B+Wb(St( zSe1%0<}7wIy~hr;kK^SJEc*FF`NX~a5KZ6i$ZEAKUR`R0*B!DCC-{$DABn3|v|3tf z@9XcYQGV{PXnRS|JnqvG!pM$A#D^1Y+XI;rDh6fd1Ai_)tl)o?eTbTPm;+*F`1S8C zv{`9HNSi_`7lWU0YtW~_o1OuY0996L@JhWGRWW-<+xjSO4+>C6-`WZ_C3IQ7gPcPm z#sefja3k-2k!byL=Jd7ZW#8xV>6KTa`332w%$MJ;I`t)%YSBwgt(wg$?;=}cVqQUL zXE+95(T$OcUCc{j3d6S)skFfuNA^nmI}S)ls&RxmyCLVwPFuYZ!XEqJMiZ&IpY1AEf0yYJj zSGsO3-Z9ux=KSOWCXYyt#{g=|`*iQWX;hzRybQ~_7p|qXRFH~#U&u={5n7>tmQ-~8 ziDU`xtJ=>XhWbIb9BW-~lTD8DICgO-!=^rfm%^>>2Be}26CZKH%0}Knh>WaS>_{d3 zRdnqi&mv?E*`zP`0`#s)7U~-yi3hIW4a}Git0mmSRC*`62LLGIg?d0-F_rv=y6DK?A8isxaGPQ{_7hmx-@ehG5 z?6h5&N<{eDB_U{*OVt;Hmps%w?gca8z*%oo)1yK<>_A}tpbA7a5E2noDCV<;B^-t&&;I0>E@PapS z;C=SSG>CZ(oiVFST4N|s1xnjMCX_;aZruS6Q10wIpThL6R9w6JW4>23TE6f7FBAby zW4*U?RCj6p$}6f|46#l0>0nQZvii?EB(Th}vD->dz)%Me(^GYmsNg%3<~;e3RkDGl zpM6FhNdXs30wcB#%)W|&_CG&MOJgrT>%l(#?!H$E95#w#IC!%uK$w5UI-sbF0u_ea z@RzF{0Z=yMz{w?f1@`ZhA8WisLZO)

h>D}2)6pHCihg(FJ|N=BE#-$6R{XvxvLDi4m(P}POG9b&1AHAj^c$BF~!) zVU6%0U0Lyv@~cHWz~JdG-wfH%we)d$Z7(fbx;ig;;kG_diDE|t&&mBK{`kVaayh#T zIi5WmBV=IzK&>0q!kekEt!j@vLHN$t1hrlINCYw7%4dD18Xvv47rMK)u~B&(W==^j zW{Ye)v{W~z17odXj+Z1bD`R>Qt^(2~I4B5HDuu|Y{y2YFu+xGnN)5RNxcDaOxamrw z?wWos>7Yk?GU$XD;I{<(i9Dmaij6G0Ki~dV7|G0n|BJs#9N6>QWbW`Xc-^#e3x)r5 zok*o(D)69EE$eCBYUA#71;tM18qLF8w&iQf=CfTw$lpe8%Z86S8wufikak@22J5B= zJ5E%AZM)!q4RF+3!p9v{=>~gcxw_e~^zZD2*b8J>@j@T3UT|kpvWU^JO8lXj)8Y5j z<`<>v^OLq~k38UzU+wce-}3sxq31czK2b=c0`InO?z)V+tQk#Lv~4R)h9ClZ1FP;Y zNqo@AY_?Z;$#ANAZ|yy&n}TNEc79u8lyLbn`3eU_^Sr6TtX-bpv+kCc`WlQU{b_CZ zZXZAl>I@D=(4>f*w*1>Kz_AtVo4L2+Alxa7QQ$rCm1@vx5~{PbbmGdvvnkbR380Ny<~A^wh2(^_AaB6k(aBfSRu|i&%jXNYpm(2*$Rba}VvQ1Ha2P$N1Z;N}wYjOf z$NXn(OT>%mJ$rz0ZhQuaE+(qS?kCK4vHUe`h%g2w=`oOn3MgFr@jvjZJkzEEy|{t} z$U(~o#BzU*!58ET1qZ>-b4e;ovTgX>&hk3D?+7oLP~lwqomL)K%2BJ<=OWKs zel~l*WyIbz%}xTw$sQqdf>ECpTByUIlG2TN87xSgurTo3 zpN;-n%UVp9NC-1Htd+hHAkkl>g%5MCobb3W7%pjBk6sBbA0LvzUWxfLPM_l)3^80VJ;C}M$M%alB~bQQLUid_O_|B#=P|8E(EY)|?JC#F)w$NNaJ&+i<7eWt z1Y9~$*U*NnxrV&XxjXXts3(2g3_p&apUOG$PP+9FqDn;J#9^xE&;gnAXxpndjIA zRPyQx&v~7*R=gHTTK&($Hb0ory;sS#UO*OdB5OIyqla90?#K#7;cYX=(KD}J(Z0bY zg=!t9z4rp>v!J8;$;6wb4q4-}DrP298rA|vdzN!Zus{gquw69 z@8X|6`*l#XBkG^){~Is`rTN6N_Ln3vXuqXQ2k+VrFAQ`0BN(H{+VcvWedzI@5Yyx8 z4aS)3C*IfQ4_VWVX1lfi%SE7c%pJ*ba^nfBVL*Q^`{kqp7y&qKD(bY=l_%iyt7vS_ zSl*C67oLq)tz`JevU;_18-9wRw0N(wl-w2$b;_xmcc8+a0QuXSKOET$rQ$%b@>n6A z=!!4NUDZ||uHV8NNl||v@uKoIbOt(JIAMR(kt=|nc|xbPxg~nD$&PfL@{O0P6wPx2 z?;lH9{c;NO?Q&A|qvJ0TM_@LK*}s(>AR|+S9wobtK6NtShE z0ZPA5C3S-?DNq(00>&=L^GR%djn|3Us5r|S>r)j|j=%6sFoEDRvMy|tdy`7=1e7`1 zhuI43C3^lu-CUE#5`44r{jL;D6$|uWRDeZ(-Dx%7IlugmFDE16d&h~Wenz4?Nbw&$ z<$5;vqtNdjMmR+vMR~wye|rt~Y`DM&;(PxC%z@>}X6!O2D&%k0@xhkg)Dp{F+Q?!T z4HeJ#LGDYrAhX_r=lpLi_LwHhf+4DYAswDOSM!sqTB_|^+7lv*m~HJpSO-6V{y`-o zLfwx})F}Mz5x*TL{9$RbC(*{_hM8%Q(|=HJ>=4PE4{dkrmF1D?+Pt8^vac!1f3H90=yz4@e!rYvdcTA*Q^QJX!jc@!`l;kBt z*m}qNXDidoC>vhY!^k*~n59LJqOzlC=C7YMx23xAii+iR=i3Dp0pBB!`g+D6>E@g^ zSky&mq%PtH2O~ATRduhaDHdmeCi807m@~*OSX8U+cs8<}JN(_x7xM|ntUDU-3U^y7 z{~t}~9SHUR$MLo@Qc1Q`l1f~%*Ig2lxP+8-B$0J8vThWzN0G?7B9V1TcJ7jyEqm12 zdmWD3pWo;E$L}xy_~Y(A@6Y@FdcR)J=i?bViKj(gh?ZqG9c_y+>XCaU$T4dUe!()& zH(eXALg9%_Lis;UgbsCJaM-pU#0wd559>53qE)9HD7V@3v$C&RRH4?IXyVh<7hBMK zAJ1m6ynLc>?~HI`x5zP=VDLQqOr9kPV(*^-?406NmO;2^&;>MSnL^i}ke; zRkt?uxjE3IH-867yA#EG=*^d#9R0-G9I=b%B#Me3{nzZsZEkkqsegHC^(H;mNE9CZ zK|TRqMt{h1@kY?A2I*=SzNUl3k{xrh)_h3FTKz(Vm=_I1T-XLq5!4u4spcH{vHUs1 zJ36jE9Nz7P`Bs)SE%fNr1^^EKSfEmM(VKpF$er>L(~XCdx;67NgWExth1Qc*86{BN z@F-UP$Z^pfP)gu{-Px9FC%4dj-B}#T5i~kkchaf=4%tQMR-@2c{?!VXWssG zHv>m!_4)Hd_7n0?GP?^;5Io`#=fwj?S})O|=_8<^$H6M&XH&n*v4al7#oemQM?$=; zBAWup97oZJrYfb4pzVLb|aQ7f)DS_MhQhZV@c zPu>RoI*P!~^WPFQMKtl7Sn?PJ=m9A|qdmCF(I*+~-6~RZ3}q11bNTLShMcs$H^~ z@BuMc!!==Vqzh>|lftW{9Aprp4fzb6FeAHb&f%l$qyKnbmjh0R+~e@EdVSDnRczaX zl`U!39+A1ewwHyCl<-*u2MN3F#i>0S3z_~fQi8W@WyH;CX^u@Ibf073A@Tq8LHo<4 zVnO?6SqM1xS?JvvJS5zW`^qYJ<3adW=oeBqVwDBXp$ zD$#>(5D^!~+QVY5KjbB!^7cd@D`NcUISl{zwqfCAsqS@b&?gmc%8G*QPF144Mosm6-CnY zv)?Qe(jQ2-Bg6*fP2L*E2?HYF*ue=s1DSXWDhV>MPS{RtL5Qd*%JuRXzkyj9f`Sq_ zI@4-=gBQ%$v@c4NdN3C$iFNI4b^?F&%Em<6Iw%eK?{txlZ2Aq$Vlim_>0fU+=Ttli75O$Xqze98U3%2;rWIl&b{02(L(KJYPe)SYkAhnw>oA?iu5dh z^A?v{1#mWZl!!Zd(5&anPczGDbP%dXVRa1&y~`NeX3?i2B}LUrr{39kaduylMX=MG z^+Ap5Id&!gl`t9epfPMrxoHSiv4)%bO`@GqyK%_nRNyZ@LO7#H>0H~p=YHCyo11=r zy8Tt<-)Nmn`kF~xu>abKE}*Tf_xtO|wFh3ClYMR@eYwV137dmB%DG*|97>*YuYa6n zBWwNmo6^zw%dNB@O{IjqfnWJv@6R53WcDh{{lxLkSv5zgd`faM`PWG?H@%KD2vXea zpwzC|kwqf+Z0&68!33PiYH#o3;z=#+dAPCSf%VkQgNHzG>(c^3w4O3s(@%}T4LsAx z$UPwXcrG>j*;B$VL?_OYe(?KMv#u+pQTQ2nYvL&Xv8y4uQG)d7BP(&M$-@V%8d5+m zMt3Nr3VQDMrz(WNW*Gk&TP5fA=3Hot64W3waQcI8*wAucmI*g2gH_#mdUNoVz=cgv zkCsvp|4W}UciXJ3elw{IFAnS{kELUOhb_pqxCFg}_=mQiZUa5#c4kfdV>w9_NC$Ej zX`3=48#$XIIMcdoE{J3uw2%^YmO1J?%fH)SVQl9sKzx%L4c7;IV*9H-uPbx9KzEC# z{mv2N2lkvg@2OBRue7m|5!zNqc?7S9JZ_={k3KsD0B&Zhv8TTKRMcZM8aF< z%TjS@^V#l$F zR9@mw%hF3#k4x^{9|q{|C;AD5uK9fR zf%f4wG4hD#tWhpVZ!nLPyJpWxgD78zK7ap0f>Bb)J__GVuEPCB5bsYx-&)RA9Z{Lf zmlCh@^9ozgzdlABu>s*l#={C}O~x9(aYlNETVx?5`pd_%ssE5!*t)eyKCv+Zpj%Wr z7Ek=ZdPIRIz(J~bg*f~@ExNic}r^Rcj+KK!xqnj zUYjez@Tos!7UNf!+6Hw7!VV<;V=9)?<>!zY2ip2|^NWSJ($O8wpvz(VV3=*);=V(iJiV6B15OPu1)6>x0TIfXFRYI%zGjVv<3>V?01RnR?BO3flQG&EyM zjp+VTXj_8KkrS}B+E-^<6yS}*X6u`;;)XohE<8|sH#sh33o^IS)1<;tKlnbqS+;0-Xp|t`p zZJt`^l1E7DqPzyEsEj-ce3x(qR?n01O@E7=gKMU=ZkwOMxoz`@f`G(MO$SbMbs%*si$**sfXByFS1oy@r6g`RV5~ z8ospR_p>BWA*~6Ac9K73O)!kn3S<^D9)RPGofdJo?mE+fSvYM|wK9 z&4Kslrk%d=31Z4r`Nvf9riw84^_xF}=?{Nh^Gob$^o|hv@a%yNR`p8)N?P!1h$7mj z>7z@e>-B=O*y8=?T?Js)C&yKMwzi`hr~8Y`?WUt-sh9Bms5dx^FD1z9cOMH3NTkt3 z&^@PD%4~jK>n}fEvzZZ2ec;Tpkt<55EO6TrgPjRe@OP$+@;F~9gUE(eAtIrVkM90k z9ZfY&d!gkFBkeAL_T8X?KEsmAvE@}e!A}MO76*|ky;_Ez43%F;lT(m~Wk4^dYb?W) z3Y7+1;&ydEadxQ+T_&~eEy{Y|Zl)28{?hrJpIiW&r?a4BWK-yJ*6lTA_aQmZzz?lk z-2#oWqKF)g*J5QAb36+_HROp8M7-9>#aB+~X7@;fGsLc;rLS*@Vx8k-5Z*k5j z1@o(0!OHcw8AB){1 z2`FR*_MYX+@^1nABxQL9koBCopF7Ywr}od!Y2zOzf*(QzGZgNUU^`AXvto4|m>lAVb{+VcyYvC)y{sgc3C;7^!kD ztvtIlW{`L}pSXZ8Uz*|p*@p;AdO}E(K8d+?0@1i_!B_O`XhP|+ECT&V%L9hK!m&LprO1u3ztm2>YDER#tt5tJ_`4 zHLfe|u!kj|*+@JMn~8YunNK>H%NX5{4r$dBn&L;&I*=E6m&@D?NM$Td zn@8{AD~#c3`y;@~DTHj-o|4 zZ#f|kh#Z;=*s7^4Xg=jpVAj-XG_Vg2y8UjHsRC`hJ@=qp7D7_W*tZW~y&#P9n6+)k zW~9eM#1eid3QWF&My%0~(KMZXzHu+V!8b3aW=y$<7YV~h^3VLIEx6g%kvQM`_R|63 zV8z1Gf0e@IRW90}%lX?8D9~3!71XijnN58WcoufM7hHI-O!VArJ*h$s)W7#tqMJlG0Eb?FoOb+n<73=3{DT-gBJuJfZ-&^BjC*$)4Q(E!fupB97WmHK z>kr8O;!-j=*=|AtsU>*-`}cuY=L)X4SY#yu>6c2YIqsA$R>vxQF_lshq=?O#tBZud zKu`=t<6mJB;olx;+yXtkatd{54(qV;5f*qqTK@n7)=*y|P0=SHCrlk>J72^>Ca%2i zeg1gk z@N1mtP(WKjGwo~A0utui@J>OMe0`{i%Q_wHDj0Jw+hPCElr3CZe)4XS#GRX$J!@2( zd$gxgv^-w;W{%(Vk9yl6`?Zc8P4kdu(cW2e|BMJdtbA5O#jOQ_)O)R28mc1mXyrxX z^$#0e)}X1wS#jdL_rj0%O3LB>*_+a4Ggn1muh)SpHpsItAG;rVrv8J^RNV3KCJCPh%{0J zy?p)Aq{SqJxgXzkHLLkU;C=;Z+#9?UJXq1$az?f#^5|(3r`C|~M$Z$@N|P+_KFlYP zD8WvR>S*|G(YS{a*y9XiQq`Ex{Au&EJ)61xpmG)%YPaWu&$k+ze~~eGDAR^C0q4_K zd9Mo-`%(VPW4rh%Lx$Z`c9kiq_vZ00F=uu_4~!0zunk?lTNbS~M^V~EqO(9H0;>M; zstj0B(aH`U8Q_M?xXL97}sCLpb^u>8-Gk?T7NJksUZzqBLKlr}@;INSGCYX5! z&Jj!-)CAyFEutU^V%HWMu+1gn1vZ5L18}yA`|^6-c?X_Bp+`(BteWV)D{Qq{a?tjI z3300s)JTHj7)j0V*v;2xk*r@re^{f_3*a&9`0g!hcoeusK`J=u>bz@8n_YhN!~BrX z3vOHSKHIeNQ#^8lo^(h9pUha9TT1)L!O&9KpBFr@#VgP$dH(`CczPK6MW1yqHl*Y} zv7axv0}^~jy>F|pbKf$<4#j}}?U|2<_BV`0Qy%_$q$S935+mO^l@l||57q*pcEH$aaQ%AcVEV6sjmEGAIWp-Dt}ac0uW2 zXMP&T~2^;gV=fRTn97k8&x=!){2lQ@h1@A0^u!M~f zr$iBrt@F&!wFV7m-J=bs`x06`pbdFO+~#G78$#5Mq$@yDKipKT;2+6an)K;|@XN=I zY9Q9c<>S`C|uwY@4^Ct1|fiJi*@C8K{~$&|ibs z3g}*fYj0QYZhJCtFqG zvNZjdVa!!4B>X!94+DEWorEiJ(0A8NSH_vHAiIk06RFT)taX{A>D%|Xe#O_HF$O94 z&6TqkAAh>eU6RSu?_Uf*7Uptj+)i5aLd@s=w;8M9cs5T{!#;5_3BiJ~sb9xbM#P4{ zS3fj7dK^NQ>$~#4f~q2H+U2{&l*~%|DJwbYr)F{zDd_!9?ZQm#ou92258Ei<@+?jt zMw@o_nVp9p_B|A}dIc7JKDY2C=a$LgafAKd9+QSj1KLDN7`Mko(Kv|s5qd@L%r*2* z&b%PI1so8Zf3_7SW3SM7KMQvteMdAAWG0QCz18>ZB$;NFb&8$N-e;p-V|su{vT2_J z`Cof69Ry+D-)XhAwQppz0zr>l$tb341Qw6`(niVUQPGwD4te3E#Jq{iW9YvU2QH9i zl>~YN={X@tw{{e3CYkHrt%Hh(HNaKn-*QW#4A-ug5PsdvRaGt@c;rK#g&e$d!HJ`z z--7tsGr~y*{``3ie+e0_Tov&%plBrE3%4J#g`^aT%~jwOU$_J0K4_MfQ9g1(G?xpc zBM14%pXE!aT;DP9Y^8&Ct2aOzC~A+#=di(KvnSSIo>y#oX*QY)g1$*D>3FS`P6((k z`gp6lv-`h>*LB+n)|^XR5ca3AY)l`^w}uJDW`A+%$=)C`71qwqA<-)i&F|q=|8Vcs z-xoQ)4*a5t%Z8)=y}e}Kl2HG6m+2dET;#vZB-IHAJynzKh3}jcw(IZdck)p|ydh)v zE2+@bEKzM}ZxmN`+I)U{Ow6Wjtf+3~q1n(LbC>x2p(stq=ID%ziz&3zTo-MNwr<&7 zyj&i}4XbUs9DSFDkkYKs(GBMsyG*OSs(WQxpi77Q-P&2XxdjnyX!u5iQ%k`)E1Q#z z6-o}3y0Jr(MTqa`)K!m28f7MvJDe*l%)A?p>~@acaYgu;QSic4Flxlbo6CM;FA5Z;$x*}TC!zzdui zf~L`IicMb}xz-LuR{Q1A znziUwsn~XPi>cI+{FHLhu|rr3Hz`4MI=(51px$(u)yqZAaBoSRs^d{5%Jhl@62rJ> zmZdc!q~6&UZ~`cVJEu3}rA0v+%^f&pVnrUZ{l;OoWS~r|{eFFlP6k;fcG|~TZMgtE z#N~jikr|S$#`CRheV@TeXR~L=4!Gr1XGEPk7?l$px_UI{fLTt?k%!73qR*W>F}n3d z>f4w7GpEjfyLC=f|N8!iM=YM-75?d%M%5y16AEqq1_YrZ-m7i!Rk{4D|2t@zZ<+<% z8^kdG?j@*LA?rJ#CxdgQ7WW?OF%C$IT?CW``&-;Wd^7oCurdY{t(y5y^W*2siIm6*+=R~)e+-iWreHO@( zKgf`u5inJ!q!7?^k?^fVXb{DCv-nB&yDGi~l?k%6(+x z)Y)WBf+`(+#$_;(4#w9C&cJddlX2jSPRE`tWQZfM()a)lz0W%SbNJGmCQq&waN#9l zXT)5xeA<8Sk@q6&cmKJAThLQ@Opw(vTLMF*S7vc)P;Qq}N zQ-6~)4y*AW2!2VZ{R5FI8;BjhQTuE7Y<~SK&e))3qg^xj^Ha;`5JuEvI2M>iD zGGd6qS*>&0MK^fDikbozzFoZVXRGJu@cSJ8D@$jat)@}m{4A!1S%0PNmEy>4{B5oe zgTz{(FpXz3YFmg|u8finAk~Dm9I;l33Y^gZ^W4@3txzCuq;9pgW5~vnJ%`hJI#m3S zyVQ|&8PB^wP6FHk@oo~8Rri1rvLI0f=FLL>fkYf|lwE18K<@)tKHEtp$j()x-hg@N z=bvf!FbD>!7ofkAar>;R4om-lMsqI8@e_Dp$ z6yezbrTMB4XLv;WW{mKrH+$kC-Z)Xl_P>Spj%xY9V*d4pye_MBcXn{6gaxOQ<$AP$ zA6o0x7?3;`Jl?H^+o&V=BXQ1Bjr7z>nYZOYIaY2MItm<%EcjT<%^9NV?@=$kn|85NRkHS%CqKNdwTxOA=9a}mk2KfTA$+aGvBRQ zBJbGWf5T@7-KS95QKhABEHj!Iaw47ekq0<&R;m-asoy5e{Rg(+a69hFTW8*Lg!mq3 z4fB)a4(ULCSj9zD=79@uEwYHTP1nU_sv1u3P~3^&@l@={*4I%1Pk5o(5=5}TiZXmw zTMpR&s`CKg#m>d5aZ=#)-{2Ot`F8`0TBxdDtKZOZh!tW7E%v|S|5!F1dsL@_rNlqL z>0iUYgyi1c$TDfHe6d`gK`L|xAo}W z2syQW6?!bg5d=R@v241K-~*!FdHT4R60&WghS>$va>qfI<9gl3vrV}8&Q88(+szfa z+_cUdM3;X7=~s3FGrw0#jXoc1Np7-+lO_-Mz*jPTIjdiAyKVo$6gC^nC9#Wd9x{B# zq)}t*CXH0z_ndo29k-v^!G#V~kNafdAn$dUnW#+6)}{Q3kRBZ{^>!~iWa(^rz8*>) z05P`RB=s8)A*GYsghy{0l0g2(W0}q~RWsjbR-8H-N@MIka2Kb|csHPlH^&0ZE)M4k zW%}|tuz7VS&Bb3kHK?V5=9WE^b5d~-+Z^}4X|t1<6QVtrKV_VLGTsGIEhM;HEI6(~ zCo0skvo`tA3~$^3VUzF9a88a-MW2w(zI4;CVob$Hv%O(oj+dw7CL=i%MKfzk_=F%a z{LJ}-dt!tx;u&LLKzW9zcplolgPe+lHIUV)pBEJ^==`M1Z;K8C*1%Qs#r`fNqv+`( zgb@xrX3@5s_yj`Sm1HF1v>A)JE|?iHu%e!B-HS)j~ir5RQ|?Hl2)=c>W-12sMTOyUWLpq|^<}Sgk)?fm%rn0G&xB8Ka$QH=^M`wvRx}HKrD`KD{?crG9jh|mYO8UX-nQ+e1n|#* z$Hw&87--F{EL@USo>T+Q4d~gP@yWnVnbUy+O30w4m4xq(i$WbMiM@y0V3Gl%|1hTA zo0H=Dcf}?)=DR-R`4+4Btz`Kd-k-X*rC-O};144G2I9$`V2(-)!w^dg9O|)R^~~jG z`V?}0zFTitoO=V;VNDiCu9G%6uXxU=TTL%aYx|FV2Zb54i%tob=t}dcV*n-4^Z7Sn z#g2+8h6r%I^3y^Rc-nn8nyLhz&5DYx(Oy;bS ziHt!4tJxUX!fe1D#!1+NRZ$c03xzbjf>tvw>ZmZ&6S zH!!<>dyn2qm9V)AUrW5j72wD2`U;cod3bZKP_!~}eYaswfLZUAR(im*nY|=q=pea~ zIHjg09yqDXT(9p*w!-;*nw|zU<9??#fK?X(cSYY9D_gVEn;(LebqU54?5$I4hQ%)d zH_>L~mJjdKwG*h@tA?DNP$A+Z$lZT!bodPI$*s@BiZJbK*D!f0ptigvUN!i_=YWky z`fyBqDn8`Vw((2eijQ?H6{iCB(N zZ|kU;hBti8yi64Om(m^c#K-75H%{UzX|`lhz`t&$!^bT#p0Q$ouM56(Q2b=8 z*vytB)S)SapN+AHMt@p4!_2X#Af$pf`7ueRexD%QkQQi6d~yA*HJ1eX#IMQr;G37@ z6t6j_FW9ko1GGTluYZ66F#??y=hjHGE2ddKN?OCZ5 zaS|i<^7#(MCIZZs4yxMfyX5RC9R44`24{W4i`5m+1wr>NyBEVUaj}4Lv!DmNnCXS* z?BsLxkgP|RRQC+TyuyXRh5h;ZeP|;XD;1TpZ@V#P@IGb>6hA*}7PPY6%;EtPYluRm zSs4)DIupRz*1|em9hB!NIdJM1{)KoE=}+W3sfPi&a+E%#u*jhr+p@nxWhas9TimO@ zM76z$n^0IGxQn_9ClVEh)3uS{xmG?h<*?j zga{BYXyr1fPD=-Zk9lltovL5#av22Pz9MSw-syR$7WX-Lt&u!fOBX^@u(q`5OMdR0 zv5mzU%?emw$P|jLHTEbq2bb4#kzvJn%+~w#`fG0DQdIWZxFP-*DIX>R5!!kXyhzv@ z!|mcWI7s+R+$Kb23R59{AVeh#If2e-c6^OscdFl2RK$-4CddC3 z3ggDh(R6c$WZeo=g@61DrX!}wfA9wX81(!P>Korx@8Ye2=5o|Q{L~jb8g=VH{8?hQ z6eXm$fvdD;$a*oEg?wa`^_qvLq+;G0j4qK{9(PqW5=0&?xn@6&7I#PT{py5uIF1=L zf9D2JbOg;s0{IfjbUBFce?{7j!3Yq|9U|epAJs}JMSVh?cWO)0ib+>eV54w%j3!G~ zpH*7EtH~wlH#r#G%jl3@;R#bCvx4e~A;ZQBHRroBa_1HdU3!MhM;U9zJI9#2z4_g6 zPfQZVb7h@g6bW3pKzzZRt9sn`-9rgOw^1QoZ!d*#$Xh*Lz{Wh}Xj~4hCu!>7X>=qd zZ9o`Kpm=@VMo);#jTj1mb;M zh;!bOTut*n;0qfs86#gmJPdD!yDgXG1K+n0Hys)_c6-)Y6F(s>gxP{ETo&XLQ1^Uv zP6itEw447_ks?s|4c0ceZC+2U%F_eB;tYd4isYGy6mrgSo%jD$3{o+Oju9ue1LjCG zY7sfVH%63Ths-WG3zBSpc*jB|_nK``b^iHP>wcYmtsa%XTflhj*$otcsSK&WmUDfl zfprbQsX}45K@U*I*1`NamySGbg1j zSc<17!2|&+Yd&I!%UfVV`u-BYP&{`5={i2)YibYlUXexju;j{V;JZEsAKP9}$+g~h zLA*lkf@|y;ZJHdvu`Wu>GkxJ_ANF7}<4G(IfmRFfTob81==xAIbbwYP55tlc_sXw4SVix`0GfxYHUw!Oy2(DcquHk^0 zU)+WQV>3Y1tn>V~ zONKp8n}x`M?F1w&h&E-kdpE0PXmqT__)V6cn9S&Key)ZLVt~e%QrvZsVw~lM?Q>iM zlGU)(09+z`D$x0>3w%!l&+c>q`>N}woAgCDP8({nzxU5#5Y|ugz{g_KNb`)cXieR* zR(HkrEsfy{O2F%lOv>2^8C{X0_td2Ye7DDdN!PbDf2`7M4wydeH;_4BS)IMsIf-zL$7h&n zXX4iQD?RQVoBEbk0edH3AGZv5c+A$)azr|X;~u5XMo0t>@}os}_kcei;t4bMEIR`i zO?lRT{FV=4$J5I=aiHme6UqZkgQKussoFnUQjhmwcYT%V|EWFM{nCkx7O}up?8!p)Yokh4)a5;;fN4p*>4;BX^@e-uX0Fsw& z?l`%28$s)F;23njhz^Do*M2xUw-o)zsxS6Ao$q4(?`Qr{L)q%#Ww`#w8!3y#N9-t# zy^u4D$L_4B_knYP_XYR8j)!|i`-Yg+l{Lx|2WGEMF}?EIzvOn)@2347;Wb#0M@w)k zd%K}4{)k9Mp{BX4yU6ieIjeA1yRxY`>rHm--&})s$!7- zOOsrHu-9_w^u|B^oTS`8Tz7?^(^lrsGkg=jHNdet4tHqEhb_k+Y<|gbzN962i80JN zXp+asXFDu;eFn|S8XM?gh0AYO?0lAF2-rEf7+^$?=9~k%e6oQOR5mS7zzYoD!H~DJ z_g2e$;;2K$v_EIg%-by;3`79MJ5MQU;Y%VVyFJ&7*s`~O0eYLwgQS*?UC3&l=Lbw3 z-Ppjhk*0rmKpN|!L!11Z;l-HLE1!0HGd*(;)m1T+8Pw5PF=St7gA;$SuaV|&a8x6t z1;zS&N$?zCK_@jqZ%1f+HQMSCyoSXw%JEs~tyExUoQ(auf&>-X8}y=Kpklsto%Mm- z!1|qHb-BFcNBd3}J#tH#RAp4bX9r8qKQY;B$^NkStA~>9l?@C`uUZ)#Be}`#I zr?Oo69rD^V((}Nk_HD~2-%sSZv(0;uMu6?x`3CX(e>qAIaPNd%;HGJd#?;9tbzh$euJ*J^6GianPf{awxhF*eu zyGjXuF8R~CuTW=cxCg9K5NX%+aYtr<`1WYc4VeFD$*A9nMTD%=J6&Mq<3u4~Gslxm zyFd@K_T2ldhGD`vdU);`$>--;mkEf7ZG!a0{PE0-;Lp%Vw^DGbivF%gT*l+hi~6$> zOt-eO%5_%13p;MUPr9pGSMQWXDy@X${-ld+C*EsLd;Zr!c)PTAKu1XBkD6BRZ8ZDx zsUv+E*0mYocxFF==|7WyUfu(?ttukps!(mAR?T4AF1AYO_Nzq<#M4*QD_t8Mu9&}F zNkj>8M7b6$^vIfyX#e)8`jZ9G)p-{gk2y!r9Xi4dKVBh%+-ECqMH^{akdfVRGoLZs zPu3R=-^YV-SV(^fVJuz|FF|y{+kXl9pJ|->Zy^@{f6+Z zJ`trS1w>vbM%(9m_*b^lNXzwLnu{|s>g~qCtpdK}wbLAggr*tdFURQELRTn85yed8 zwi_FVmuOnv0M6IV-B4yC;%+fyKds_5Jt$VOletxnECaAK>DFZ2J&b&u9CS-U*l&3N zgc+qVuPq&F|8mOB1rTq6y4_rEk^RkIFgTD;%g=LfBKVx5`sZ}RHEnfg_VRF)Kjx+y z&hsTzL=J(&aDp&Ld{uGXeTKOCZY}zm&Qqjed*3w#t{o#1ebuFVHS77<61bLS^*_i# z8IEGzq(*wxdtmNrH5Wbl`~`>+9m%9JTJu}30hRMYn;F}v9$FT2#OXi4N!H$klcu>} z0U7!eqXR6@2(X9#l;gKE$HBw$PX|#OPLa=yHe*8WHM3w^cQ{RCo4;RJ;9$VU?z%CwZs)&F z2}g1SNnCXfj+zHqlks@xmKtz3TboU$JX=5vPM}DbwTWs8|8x1FYMHoWn%7FJS{Pl| z2I&vo_-2MRjTG#bW(k`itOqCn5EGoyc;!agX8G1so;;gtyUOW`Jh0k8lI0vq zB!}^9x|XQ0xzgUMuK}VX)x1%e~adpR+ChqsaDo|8@ z&jiG)42|GOZ0s($8Doajyg>YBC2d=%e$5lbl$mT4-&1*NQ;!4^g{{Ac*HNiE9oc@|hWq&i@(6#`dcsbFeV@H`iM z=WLn$Hi5JFnPu2>9sV>WC2|{814{y+E3`Fxpo)s`+}_y613#R=VNU?5el2SQy&#mp zW`Vi0ECjHFs5QCr5bX8+)iNscp`3?wZpF&JOpklU#<+|StVmegyN^i*TbnymhVo_@dcNUTrjb}(z^iDIW7!Wt<%t%d4wF!^-{b~#3i zF~X`W3%;Ztc!c&%x--V^TjR~dLbDy95h58#EkXu?T9l*KxT}Wo1?8@@ulXTc#qrtx z(?+sUJIv$i4#VK68ABa(XWlFNV;GP>a4qFA+y5fa2N5UGS86Y=$|<^G7OfaeNu(MO z0TTo^d*aA_q)HS`cH%Pgh$Umdr0PD}89f+H`xbpDnq&Nv6p!yR+jQsLwFcW%1=0zc zTLrnR>c8PCj*wkMlV|6x0;>9;J6IP$Dw7i;v7 zVS)_FvdfW`pvPL~!GJhiXi=B(ESJU_50dtaxlxZI?MZBM_9Q+)e}kjUdoIGHQ!l$H zMNrA$bze2oN~xAPOTk(hlP9}`_^Y$_6sH=R6Hz^tyZ!A9;eJ{gG{cu|Nea{nvXfn)3$)uccJmIGKQ{i)nCnF8HUb|do1Sqcvb)_CtO8k zf#R8>apEiZ6Ok6^1^ia89>3SOr3?Hpb>rK&@fE|?%y;jriYtb!!QM?MtyxSWb?WUD z{%FQI<>sfS4-pUUMN5*vEOROsJg^N1>^0NxHEFsnoAV}K2?k$~JX!ah!NNwG$+p&U zGe;3+Z5q2Mukh#$rJQQn$|D1o-d1|FqfIeZx}Wj*II4kH_9VUC0jU-qrGeY72r*m!%^=z=Y()+bxjGrd%-#<}rO5=xMA{bazCrnH!BkCDBjJFXFC z%8tpp_w;sal5@=a?yv3Co&QT)m)+b(d0Ntx5qlD7UDGqMQZxVbHVM`7N09(E!>821 z$=7eE@AV@)enUszPm^3YwNIg38*d&$ZtsMYw+v^S?mI=ex-5R=ir`z5t*)aIh4F45 zbB&I{yDMq2zSAhyfZ5d?x-C0{~cU;0yGj7w!^#57PLaWd+KhV zp?5>CrRZ)6r8#U}{t5~V*}UE}u^ZA8qCi^p!A~9LtVgxSo>|gus)jZ?W`5w_T^lk? zQ|4yg!<%fRL`1{qU~)``&Jx0N%^GhJWs&q*79kmdHpCHwxhr}j=&BSIZ6 z>|94$V(l%8%w7NCsYJk?hka?!F`BT6&t{d8c-7*N_pheiAYa;kd5}LT6qfEY`I2jS zz4I#eSdd1HMd4PMMNTn}6#>jN?hYnFTm?*fYJRw*C63Po#0u5B{+p;tR}^nGk4EM_ zxHR8JWVKqYmN6yo$?XhxU&T`H)R<0$w2l|s?)HFtlM4%!fkK{R$GaJ|U0>|Y)prz+ z8aKX*od;)U;E+CbRVfoESl>vKsZyB^s0!F<5#nDeKXs;48bbT|vXQ#&&2fI;b??Ub zH*5IH!Nh!UpRz^yhz|~py)vwSNtP#9JyDPCIl>0w!Kk z38;>AJn+B_H}cU2l4W-QbA*n?)Dgcq;w}eW% z60Un6mxAHigwn!hc(k?tU1v$|ItNOwI-o6=q7maTMw~v-+Z2;jj6yos^gu$2pJ)sQjds|V z!na=4c(z?-JlKrfCg_**e!C%Y!5$vY5&o@vdPQLHd8UD@%CuZz3TrY#&Vhxc&qCcd z?|ocgX)Tl)lE2L)sQvZJjX=()HmrUeZr&f3g8wL;?H|chCOZ*Masr+M4n+u>0yVCoC|(*w3k+G zULVHKn@{Vu`cSVzGv?an$5=b$kvZWwMT)8Q9$CHttocTnG_ez3onCh4r*A%>geCsz{512U07TZ-h%#wYl zI*Pji@jspYVYrt(W9o4ERib+Jb4_p`JrNXC~R*;jnyB~P#!^v#Z5nr zXPw~)Fue8n>oXxgoS+CgACOvFy?zxdb0Y+^Ys1{*-NnDb)FmaL>|98x3M<$pAtdpMK-|Hl>5Ns=VQDyN*1 zN{U#KgqX8Lma`BYoU<+GGa<>T<$SiBPqCckkerh=%=x&PX`6fZ{oChzUB5r~&vspV z--q}6`FcJd&xAaK0fqkaXlRu$f=SAOsZ@}yaMRb>zdP0rG4$&`-l9xY58W!PMzdTF z4MlafK$U6qCO@n5e6M~}P9Iq9Wxc%$X=b0^fi}kKypu!!2)^6Cmm97~j-x5ttb8AS zCIh;Ui4dQFvy2||8< z!gREL`xW>W7aZ^J^ z-?J8{{SiLj>Q3aM6aVg|ktNy7oQmwBY+*FGl739U!pHL$mUcc%VnhyR(FGijfts(E zpv@@g&D;S@Wp^JUI*1bz!e?OOkxiv^uGW*GxR+;846-k3YepA_a=4l!JTp-^yMBeV z`8PIW&ZJYX4abbQRuVBVCd;THDT6egx1v_)ORb*!NAZb->((kUqN_?PX>PQTNT(%r zEc;K67}VVrEa9xK5VKbv``i3x2-D=37B&ZuI*f9^c%e727YUfejp@K41iK#l#_S}c z<0|DilO4OcI%9-_e;^~Sx#?QG(Jrprkja|t@HRp)l)v!Oy?WYp1JGspXK*Lc_ zMaLumIavi5AulgTEY}xVuH%wpRH6HY+Ml2wz3}5b6wmKG5@?a0O`6$d=bU5F?Wcr! zB__i13gimdPRn>nFOJI8e*Pm?OZDoz6$TXSb9A+uzU(-L0M%Od9$C z)c|nrvVM#!(^jy3bsT*5%rLudn_u7$iQsH)RiO~mo)?>YJD zqfyXEhWf5mo`Q+p>aiQBcQ%<~pEWMs(>NGSIUT(I?g7n6lx8$&?l34stGhsvtIa*4Kg=y^!TI-StDU(>lk)hi zoD1oCYm-h#o;5J+91WgKbY&epDJGt#>^YgmlD|tU8}F-@1QkE7)V`LLBlz;$l^TRJ zr)f-6yyD*wh)+t9}A?p0>(rkZg$1fiF86j>O1 z^(v)xYWwCQv~>Fh_Cmqm{Lh`vcSzYKG$UqdA+`!?n#8rP>BqjO|lqqOR(k#ErP{O8}?XF=ZwV({}Zi zYX{~_5V-d!9!DCwF4NLgpeyt_HjOF8{p90wXklQ!3ui3nYq+uClr}G*rGqy%Wkjgw z8F0(<@Nt<$07Z_GLEzCN8dee`^|E=rCsP zbb(b}tS4qR5A(%y3HCveTjK#gw_Zy)xE11YR9(#>aR2qiIQs@q+jlYUC%x}PK0QpP zqs|Eq?#;;Dx6{13)G#y4SxuI(33 zc8Ne-N7pVZYd#7v9Z<+TbcR_w-4Ca3ygi?!zIFYcrbOvwboSW~=26UR8^W{~xmR3m zkzJm&1YKd8!M9mHK9Rt=!;gFpL!HUN*P>&AS(fCIc}>B{r17^t#fku`QPlJD{)!%`K|&s-0P2PK$x z>>Yy=sd>;W-9nxnGdC6%&G!5;-0v9IVjiZwHD31COuLpgnv(UaE*<$cN0V^+L0ag< zMVbA>GrW?~XCJoaNaerX6Yp~nNIQy1dd?ZbbEtJ*hhNaDn`hZKKNRmG^moa1u2&lE z7P!v)-le2G1OG}@kyB^oKoqQBx(&Y_A7KE|c*Y+Af+c^mzx$c;L-*Lf8c*$~yH!k4 zzf7w6_7|qBSr(HL9!|%>#w1+zVAmoAOFEDus_5)H_^k|qnRh(0>CwMd&(Wunc{*I? zV=NusethX(b@sFw4`$u6-9ZyQ5PG)-U4276cU}e){z{_%zDXhna``0U>4P&2F3k9( zXY@<+Mr$Aov%&p|<%%{#07^D8$FgDsRmd-LU$dHlq9_?*6KK;1v@IBM-eM`Ck~;Ru zEfJaNu5Tc)6@VDzWY<3}kn;311&{^5UyP?ux}U>0?b`;@1v<%?_(@Al+!J;3+A839 zBF4@t(e17$i7QKTB?NU1!!RUBps%Uh92b(KHGiFL&9kDUuXl&R&DAuAFGy>w+E3wj0IVepPl)zQk1wZ^;9@jze}D=7W}X z6i+guvvRI7kvd$c=Td~~<|n(o5nwt>?88+q2~icgaBfArC|H3>~?m_bMwl#``UtU??6sy`;~rQa4}b@T`Y$^iN-J8 z{||P0G9DS(r|S(AoDl)9zgG%MqPrMX;19ybexloYsuyjHrfIe6YrHZZU*+a~RYDz4 zb7oW>oaA5A6`N448B-Pu;?#amUJ5`D2O$(ibFQ2?Eng%sEQQ~?pM1HjOKAiNtzFJyS~iBp5Kl=fno3E;0CXotKF9oPr{Iv6$MC?f|;Xm z16m*#(#%#ve^KO)_sYIpl15yz!$y8c(dFdVacl)|f_R%v`}{5+wR8}3{PdSbLeO@3 zVqbJeB9?cPNyH|t7grGv#cg+lkUk@{#j2sq%HLgFc=w4+&g!KBtkRn;)*jCb)J*u1 zq2&Cx<2P0fF)7b8n5L{Jk#O11;Ris$_)m%|U?hp#5eVl@fmW5o{tMSnkHdw#p1r+0 zEoHW4`%9u6K4xkDXh#=&6bZzDFLCWp_?adN!?h&uGt4OddN1d#Mk2fxDzyD3^p7|r z{H6uK$15br3l%$)dd}jrhq_^q0`=g57)KuCZ?4jPiV6$o9{J_k?O=Ko3I|g8E)A)c zM)l>QjLIEqi)&H!ERs#~b~XkH3h=DF!ZO#zzawa6mmKuNzDQ zwkc#cFbD=Dx5V;f#3mq@Oo8A+qbQ|%?hbLEZgeU^# zRnecEV)bKdMu)ZCo5_Mfm;062I|y|7-MQb9RVOgH;p*8O%LlKoW8ZY2(t$1q@}8;r zz1M*_pZ)J@21ZjT!E+(m6AJG+6;-H)(W&8g0#k~lF*2F|c*2JXV86e#g3%U^w}QIXamt$4%9m^gTd&!Hj+Z}G{$+KP@Xj<- z5>|L$wn^QRt_yblC71T<3#O!{CujKK3QcVXz$eg;_3L5|sqiinNc=zE25jp#9tZKIBWgFtts%{Yc6Iu1iakA7@xbUGLf{FSho#6WeT~A6+ zcNK(Zk2|FBWod_~NX;rpJQ{J)S%mj*&S)_*5OYT_Wvd($Afq0v1tXU1TiPRPjv!Om zT->GKHWad{n+=G~OVUi70@;O;H~;95a!*P>)>_E}f5pAVyZSuTKF;HHO!Cg7`VuX< z$(37=-#k+J7Ar1DJw38kGZ4f7Si?};O4{Zf^sBKs6tf%0ubPn0f6dM}h2nTr9|%Wf z(6&4rw(MV9SUpe4NqBL)tcJttDk{{PrEs)|gyE_m6xQoPmw3Qfvz)29LnJPTwXtb+ zrX*3Ug=7t%>xZn!TK7$rb`ju_n5w%+-6S%eaeZP`u6Ih$OUy+;?2o1dc;3Rv`Mi+{ z6QjQmM3n~-b}(keyMo=G6WAARUcHmU1fH08o+=Enmn9LL8W?_Q(dQqo9N3d5>w%`i za)h=7nuO15@CaobA8vCLW z)DuG^)aeezr*myzzS~`0fUeh+VK(8kw<4V0uYhPlkj?11PtxsZ9YMU%16s~~du@e1 zDx1HhF2$3Bot03*kN6GebohYXkLD-iuUWEu(y-GnUNtXZ=Q0y9q^r>{8zXhy`Fg~I zud^yXe?L!7C^7#I*TOnpht33^`I(mWEB)wta%76>`42YOb1(lA+)xif0{ZC}YI%hQ zcSpZ_nt`b|Vk*&xv`}g0ZJuIq#$<7uW z!Bp6M)qeRqaP@%hm|?Mjk-%+y&<_Lm^C7s}#G{P4!n%?~@)URIH-Q2nH{&tKh|%D&*c%bUrl>QS8g;QeVg`#76^m_o6Dp39`Qbh9NW{UFC$!N* zeJk&AGgX>C_~EaD*>)!LwNx9>o7rG&rBT}#&K1LGQTFhyMp@(4^BV;uf;k~Gm?D8G`1=bGL@xW*2) zj>XD%W-X9q6%VZ^@EY-;Xt1@$yb0JGE9`M&s#T&@g<-MBIsR4&J&nofKhGB9a(9*1 zxy^<;yK!oTkNk6iFx(UTo>oGhF{=vNO9m}9^TsS(Eln`DC6PLTdl^|q=D4<)s z*81VePV?P|=6~R;sBBQ|>6Fa(-=1|Uz@&{nTQ^)B!n5yMw$t)0AzJzsa)!s$Nxi!_ zM_3@Lc-#R-FXrO(e8&`~Jwk=v18Q=ysyGgTY>)ydIQZn+eo?fS`&wHY?b)S@?aX&f zJQDrjxxNBvDj(Ht5#jnZb7Cmx>Lbj(NFB*_oF>_VOdG<_-B}Z@)vR;GqeVX>K6GD< z(}7Lq30}0@8^ABJ)HYu^ip5Qh-eOWLE$Q`*`4~-sUvG>T%kV!!QC5spDt*2i=aRRj zYNLx)BM%<*xKaqHp9M3;q{%5aH4O|wwK4(qEB^x z=fY&`SKwRh?eRSKs9jgl?ZLYbpWjg0t=8JjXg(*!W&6FZTKSN2WxKI3-u%clHe_mA_&#li%z#qw(=wXf+15~%e zS9m~fR3QFg0TDc+mh-?XYcmG3L~!5efzPprNJYvJlho&NpW=AjZ|f;&RMM00Z%ab9 z4vHnFV&sV6#Is=}(;;!${Z~$M(U`h$9G4llmU;?aXE#dY(t%a~}TMK2C&_qx5T7GWai31FzBMgP=Y24nKkkl%pv2ObcT55%g!?BJP zk7K%(O3IcjA2g;kJ`fHn>37T8Gyd*~w)G)f2Dj_HORsdD(@h*qEiH2onAMJx=>GRS z-Ui6Er;3QY(9hSZr!|+3mF|82#CXDy@I?WG$Z4VfYxi~D%9(u=(S+V8DpW72`o%aM zNSQ85f3$sjKJYhqhz6=TKIbGYm=t8ysmS3N0pT?=4X{m~WZ zUB4Vbw%Q4avkj{epa*5uL6nBzdJ@8Hbq=oM68l>Wa|W;lmVmE;tU#>u+2_ICo;KIH zzO&XuC``R}c>Oa#q?i5!jCZW|U zRvYM;r7ldS7A`5al74&&K{cMQ74)MNxc=!K>o^P~6>gyn4fH=rk!JJ-Uix>GomE?5 zzl2FU*TXn>U;p;m-;K8{R&OSMUNPj+JvqowVaIF@v8yl&_rC@h2CJaGc?7YN9@ZqX zbBdqX@pRhOA%wRt%pCK^C!=Q9vb`-ux9Nc*% z(|EO4J}|ohUQ(2WE;9&x>n?Ks4kZ$|GN%7aevcd6q>T^ih@b2Btiw@zt{l<>%=Zxc!}8F>x}NV25J@dm0%b;b29}Z2%9agvo(sx7 zoUO9Hj@E;$;^t*XQM~x{9H962>0ahf%J!*lg+tvf3(urqWZn=_`hiVUX<6%~Cm*jA zr{IVdE$=>yL^<4~Rwx&Qo<5c=fgS}MXQW&KS05XL_2mWAKEtz9by?>qo}+LR)A7AQ zPetnH0{>Gmj_Qo3@*k0|z;877dxKBVWW7$-HC;%HJNC5E zt26B2JGu5wz8KoaYhP9Do^}2)+ETqqc=xa2{L6ig-22vicJ#lrwP4A_+M*9-+s|Z( zQqWVQlfsm=X2lp$+S;(}jYw~b#ps#VV~1u(gOB_Ym>PZ!Lj}HKQ(x|i9@m#U7_F{^ zW~IM(Cwp|%ZO$5wAc^I>N0_TM-IKO3uoe^v>~ZdB)dxjSOR{cWeXyj%iK$Avyb_gQ z8mg6y8P;CZ2huTnus2s*M>{Y9`_65!Tqr`Fm2D>K1{rZ;0Q>~TohE{MOiO{hND`(9 zW;2sOE)_t#vBz6tZGrH4+q=>kyTg?R4$z!SQl~;{67TI+3yW#rkxm7G317$ks{kfG zA-m5Yr&<@<8&ah=K0XQNt&TnTV)4D~$PCC=B0$ zcA^QZA4Zk0-z$G8_yPH>%cgfRs;teSgN+t)^4bjG_je;_wb^3_`gsz%DJ4d1-aA1!M1Io3k+i&S9WgXjatkeij9>g>bl zZe@#l6A3C#H<%JRG9%LZOp6d_&jB~)eG$at-dz|T@t;A{w2N`)v5O^4k-v4H{|vy6 zZ7X&9`$~%RS*`GC*}tFImNK5WoxsU<0lz`Gu%s`$6Caiu&7Ul{vX)LNl+0)d%^&;8 zVEb&ogneehYiokyd{~9+K1HiAGV|O~VAOABJS%c+ug7#>$ve!r;xwDFCoew-2Dz28@Wydl@VIQCMqK~2c#-Z4u2N&TEEK?OL zvhy}s8lCSm%(ZH$xX5zm=||FP5q7Gt<7n+%l19MJew1I~eMJwlFj*_r=id=Q{H@k| zuXcKHw@+8{U&|8|xjFcqr1u7VGEbA(64@75xnYwn_jh9&KKP+!4seSaiL+hhWjts) ziL)h$UwV9*wVyabOkI7rwcPmA{P{0m=^?zAW8fB+m8Z{?yXNkF z!e|++o_`{lPc1(x*$>D<;&xJcfJ$PlGa%Vvutc{&{80Vl;IJUJNp`|#J)n5L6Ib}v zmk1N0-@1%`7|5A{X&j69H&MN+|1m0zE5cT3lm1A%;@hA0?IOdolLKSjedy)%bN1_) z)19(Pf2_MIzmR?3Rk6X|V&&`t&6FNE#GOF5elf2z=k5(#=42@)kNClMsb_~O&;Q0^ z+Ihg^+T1drXp5c@?Q}=rbyVmXAcJE=|c{&NFBBD`jc*u=PK4)uWQfU<}n@13T|p~YN;#q9b8GII9q_v z79xH_R^U_?L5QBVHHH7xiMxp8BQ|LSB^*ty`052yKg9#5{pO8{p?r{{si?II9juXsD3q4yw=A~o&-;eT^)SMJcN?spp=M0E%w}sI(;?Sl#`{G=f za2z7zj-#%9;jEi-y2&N{74DR0+FTb^kZOLT&vMDFEIr;DpENd($VTg9ngcuWg`Z$x5XmBnJ(@KYNHDslH7N!<31dX_t)ts(d5>GdpAoYR)?-X<%? zd@{T7$~t=mml=rV}%^E(oiFpQ?GLp-xrgtg8t6qe(OgP7%y(^Kh>$wSn(_M zulGAvh9b#a$G2)=Bps9HEZ1nd)hX~~#aa3XUc+jHW+u+POwhRsU499;`hQr`_F0Ji zp}I3r)%rM^qCnO}hsdv&)zku*2DBu7-tX^+o9N1?y4Jb}pHgeKjgjzcR}=+WHsTlzWY`4rgytZ+osV1RQD+HbT+G{Rc?dp?7pjS_OrG9*KFY#7S(79tM&d)+;8VF#JEfH}h% zk_M;7VAyJ~58khg)3B?Qrezck8ldi<{u6J;SaW} zJd~((WJ=ipGE*G=&}9qFzm+zZVv~=M_6KpZ8XaJ#&l|Qc7p5Uwamhq zE4ATeHy3dsIVEE!+(}eEhIZ};)a$Mj=oQbn_DiCX}`EH^`-u&F`Oig$}A;D~mQyqS$T$^ALu|GO#XMKc@>Pl_-dO&8^IGCE5_!ch zO^r-c-YC5!T2C{xk*&+DA;-wxM~ikVxP?UdGE2)!#`68!CpSN-SZfH89bf)JfC!J# zE(h!PNzu%ZCn$1E^-ojjspyFpa4AB)|zc| zsc{EIM%jY z^Mhs9>0Y+p#EzW~3ZmuGd6b2#QPV6+*J?7oo>|=mzhn*ynj61_Gx?xD`fWHs8zn4| zv&j-HSp(bsWw!kaLvqGF**$LJ?%q%tW*Xi+-~WS!^vXYlKUjI714{1FnxdECA@TV% z%n&dUg?smTXGo=0)Dv)miwx0}SU7aVds+&6_$`ZATFQc>xo+{jsD=gQec7mzZAP9f zG{)%FT&H(|*bUVsnxlEqjued6**WTr8tGy2W)wbjAN(EgYm!X8xlE9VLAt!uJn3}R zrlp9C^cQBk2-75p$}!ucljILd=5po|VXxnAzY9I5o&rhXrZMbcke zfPxEfThyPo@=g;kT>W(gprV@NOGsRBDOIzzHRB^$HY&f-O}7tbbFscVdyj zU!rjvjwlK=5=U6w9bEEaWppide_W2R_^`vX=90XE9*cWG^3M`aEG|b2+$ZSePal6t zhwH=OXNUdb&#C3yO{)vVQCtU-g$B92ZNeQsBf)z~mv=gV95&dbJgiDnrRO5a6e*3@0T5!yc^!W7m5eAwHI zikojw9GX0`lhb|2iYDr1g&lxX^{!b;&gCXOb_9nyIhz)8_jYq zH;I41NV3ACx>Dp*R&aZbtT4xwA%&`q%t*@&8!> z$Q?X(G4m&R9f-wL9442ZO_p7vJ@%n&Cv|&9(`2A!dZ{AZ9MoAFqNGw3SH+wT_7Goy zgY?(MEX;@y-a*LO-@STZ87UfQ=CL%2Ps08ex9EOP9T&%9QnDaap*~()dT1twXnF5GjFSA%kXQyb>iX*Yds3U(w--+`$z~($ z$k!DLE!vALO%fSf>UnVDVL1q=E|GP?l}XX4!E3Xp|08z*v_TkRo<&V2kKD!Ynrcb zMyRiqZ^E^pqGX`D9L_Dm<#Wlb_Vb>DUdrxEr6$A81?cy>8#3ryzfbvw9$$*piwlZc zyWbFd^0bnb@ZEB(A_K0!o4jj_Ufx>!B9knp~u(sYMOT&KCb)u^dwhnXg zaer$HMx!JOxt>hLk(0q}p52Rw+?cX~Kb(Ot3ifRS9%r5xcRKAGbkZ&U&@R0ltXttn znpfE)(_KEB;&vxJI^Z)!sp_t#g9&Q83Lqbi>?|k#oDXB!9ZRF`sut6D?e{@!% zGhY$fnX;Kf?qf(pjFrg2j;u(s)pRP8b^dV@Kfv0E&acd{RK9Bi!h2bd$ZpFILBb$> zf4MV?ut%L?iDUP{v{-U~zU~jK64dL-z9E|`GDP?F@A2HGy9LfGx5{=_6d)AsMk5~m z{U@G2|2h$j<+*mgINbMPC-*%jr&av`KEN&Xe5ecku z1_B~4$QVy6OPc+S!>O>{Q7FF`H)O|s{zk#ox$0#xwS|M9M3a8Hc6%Mg)Lyl}w4%uC z)Gyh}w$Q+EJ@G(M<3-Z9u1CB6((U@{cS6I&*A)|@6xdy8 zg>gQDA9-rbP@;n`83M3LT2oYsWvRW8Z=eL)N4Y&hD+D3Z^RVO{xpH5N7Ye-oV$Ch z)9@#)SfcBvL1R7i!1T5bCVM4qXaTNrSBZ^k9}sXP(5&V$C)OTgcc}H<_$AV_^_!8j zDk9s`t3geUyK#uZ3(?IRB;}x=$X?L2gbT`JAttI(ni#=s8vH|=99h=leSVhIQK*-P zc7*s1Q*({QvZVPgDvx63yIJns6+_lYMF_83D-=n={J>@bxU;4KW9zS2p&j+~`Vruu z+2YC^(5x|tuTn_fY#6{xqZ{YM5keJM%dh3%)}7Tf)pVAT6&3+ckXN>E2Tcyi4gkV* zsd7@1!40O*}Sl+i7a>~ieSW-3@A$0?nT1i}mB!&M%-79Ss9c$vK#a~mnKZ_Y&) z?$;Q+etlG4kW&foyU;Xz`2Dv@vzn@xu)vZdgnEUV(!B1fG9LAmL4EZ29CGeWR8OCR z2gf(8;3F|JUbsl${dcrJ#}NFtyG}-DGEV#{%jfYvn=nju8vYYQYKhEk%rGuo;rnbD zU5-^}l8ELe7kXWSqHSG%Ac)d56=KTe57J>J*H0pMlh|5FGiNA|)QDDWeB`1~PsjO` zw#R9TYslZhrvS}abugJlx)t|63+E91Mp&^YV`R?kn=`Bec3JzVTUI{8{BbkzXOBpY zG2^?K9}?9Qd(UUN?!Zw8VbNdY8RB3EZF2q)_lYsE#+CPxYu$4^rVJ3Pj6302q45H_CoIxItmbpfQk~2M}$|I2W|xvAi+@ zv-nQWrXe=&iS3WP0JZRsS#a&LAFpDO9>DjvH@5u-@JZ*FoxP_$QPX=^^m@m@*ADmu zTy}!d*g46Xg`&zP8DAemvv2#w(Jy7xzpRH~0nEV)EE&=I_X6z^@MR(4fOb*+Z@4UPMqOw6h=*-Lm@@6^8SpC6u9S znLRQ)jniD1zZ*<4oeU9t|<7vhnnRF2e3Bw4~^KVx8HUm%b&w2Ax0}@zC082 zM`VINV{7?kGMU{K=zuv2*GG6*Pe*^Qb@nTIQ(X;Q9l$~L|s<*+}= zrR0d!H{IKR{3$~W#w3mfBwBTOubY+g7MV4-{N?WckBH2w;9bL^silO!KGP;n1Has> z2mjX0_~0-z>lfP(oS0GCdYOhXakWj-C-YO9tGJUn@^3*mxu--vIDqmrHKK)oGXaP> zm80P_X|oLw{`bQrqdF|QjZ^?>0?k+%8_B{Puuh^ma)uN8MO4YP?n_9bwv^sUywK+y zjeshjfQ#i=G4y}D-D^WM1~nI-b(-Xr-) zWcj8NPVfo@t|Y;jv>rGMEn`DZpKSG+D#KDs4}FJH`fC*S^#SU&RbSR*o`thjrDwlp z@~fnBX~&wpixO059RUnhnJkd=~A zgu996L{U6IcDIaN*O8Dy=*K@)w;EcwIatm%e%go$#(Q)aUYv=vwY*vJELmX3QAe)$ zf!0^?!rV#$YnfrwOAVyst{rkznLG% zixFPAM;^-XY+79>(7GSHRrs@r4xCU?JGa2-YH`BJvIJ7KdPTW_K|XIlLU0~}2U>RT zf_wLUu>#ojaJlo%P74wxQs7B`hdr3Pu2w+FhUz7BuG=5u2OTsK33wt`5QDx5&XODERy+`gJR3%s+ z21Nj$z&1GGOz-y0f?Y)OzdH_pJ_ThkJ~ovo%+RzjzN#09%F9C%kpvdwX8Y~G)_(~+ zx9=Pp?XK<38A5GQR9hV*uHk*UOG?YbrD{m8L9h6^u(Z-tbv+fRFMJ|x@(?)k33CiB z)tU~nfIQDW4FjjjHlk4!xQ(Dv2B=di5lyI4=autsnysg%+KFra6DAT`MUK#jK67dQ*I9a}oXU3Dwm+7JSR8g*s7BOss#$0s!Op zTOi=iviiTogmWcUH(#2YeOLq5-He+MEt51p2axJ&$kFvh8@LaStwt)}uev@lbPN4v z2?}6}RY!i57w|u)ICV+?21OVU-lNh@SJpDisgnnkEo;n2udlz>l}@MaO1;`mQ;R=Y z$`|b=j{hlt1TJ+^{-+jO6OdKFW&P(5|E>lsE%xbnn7AyheW~)WO2IGct(k8TQY?uq zH{sXUjDku@G>Iz+PaT~@5=$*~j*vN=pVV$2OnrR5 z7x=abky5O`llKuDByC?t=K`NH+3~q>q6RPvSMs{iDli}WzOquWy&YeN`*He7)21+^ z^X9H~^7}*gkX*lkP~VAK@i3o^GaE18+k`GJe-|Q6 z&8Hzwc=f%vQ;;bU)E5T4C2wj86DeGH86Z+YQygk*<^7Z$6LG%8q>vbO1mjNbH%rq$ z1vGEQARK@)#NpGi*Y9vnMxXqT8#E}0RJCvW>M!c~$neCd6k09d2p{8tnxJ(ZiWFq& ziCVY9HE%WPV_T+J4OMECx@lIy9V6L59m^=#*ve{h?pPe-N5X zneinsgSebGcRyr-%NU47+1zg4n#8NGMdq4Yd8Zt^+%QXoK^$UrR^rfmOhOL}^Zcd0 z0FYi+h@scBUZvf!s&CF8U7)f{&09IWFYQr9hg1Lw{&rWpx1X~N-IEwgNC04gd;szy{O{J_w3^DB9~W3RX)cU;iLTd)`=GaG@2~*A z*@nLD|CP$!iy@Fw3N=S@FVY08M<=KUUr${hXk_t6jR*s0*8;z@`Kge_0u zkEur$Hi7aih^)?o-x)6`3hWyK_Jrk z%uM$kUvGhb%iE)Ehb=5Tm7trY^WR{d1f|~qYWpHBm*<5Y+%cQ~;`y54FH3~sh*vqy zxrpN<-MP;+YWU9r#3dLl8#SxE%L7DX`dee!UrTr>VH(q_<{;+0fc(BsfHY__*JBxU zG3f@8fbQ9xZY9TYp=ZCb=p5#jBRxtQP&;z>;Dx`8_1tTsRyPBd5LTY2oA19{86j}! z8;rBOUF3S`+ehv@8~q-m@MEo?P3wv9rAI_Jj2zi5F$WP40}>m!&y6?uZU=~Hc z4zi%%WjR0V@&ok-fp>m6WHa(wyrv5Y~cwY*6AKh^rxYW|1g`;vk6 z+@O+rS4;u1}DO$7d6&kME97|;u0aG7413VE>Lj7eHPR&l6 z=d>%x%|eS%meaWZmAN2l@ zX`aOXiuw8ouvilQexui%k~wFE;B922RLFBS3lc`cFtcPLu(V-bAQ zEA?Hmh-hdIdmHHFL--BH)IGv5xoyCmR3vHhSl#|K-e>^kzrZVq)FfgnT*Ba}{8a)l zQU61QBFF!%l#;>a!4&J(h|EvLgcc&Lk8aEspuYa%y7A38o?xI5Yog8l-#V2FU;a4^ z-!;m*4exf;>4EQedG_9S8Tp6_BE%yNs#y@Fg(6RIxh(dVCH95;$|u_Hx}w4s@a9iy z>m`T<8i%t?nWx{ky}~n@&JGJjx$)`O-uzJg3+_=^6%~sLn5kGngg*=N^?D^6rn(xM z{GQNC>moW0rkj!4YA^=a){**FW;qLMCtG8vU5n|1h|1TFa@DTW68u-ctYBHnnmkys zm3`9U@9NIbC2mbgsF*}_Cu3Qj!{BqkchA$*8qY!eCbm{Rm&Q{#j?jiN-1ZS%*lI1v zRG(7>>T#$AgOGIR3Al=p$@t1_1V$3<#!-Y(Z*CV0tlfiCRqhzV3mmj3h1m59Q@DV)u=EY&Z*V%{S#!}s` z7TwA3`IW13V71OQ-zr2j6&}lN+C={J_f+85Wp<*rLeNnJcg;LP+0d39seE3kKoV0K zC!CWR9k2vrbQ+p+p8r>*(gfG{r@i&CHwOe8TjApCJfC`cPZ&RY&CNZ?*f$&Vk|ZUo zZ=JW8Y%GJGf>&xbS1PJJNES>;E6h`XZ(EDMqB02wT{LC9OzwFmDmrzy*A_i#z5aL01AL@^{Qq|gg$UV7 z2-(?$bCROW%oF02nH@qhPT4Eob|NP$+p#x?$liOeWAE*pVClXNkPw$w12Ol$Y=e`^d3E3m31c@_J+UN zRhl;7#>&Y{RxdmRC6tbB_n-o>4-YMWAk#E$@ws&d7$kMjdPd~8<2QGD`wgn-#nclEJtUcVRS9diBkPQC9DTmI>Jaizo#vdjEa%$$#DbMq~AnGf7-!m%8 zE22rf&t-usHm2najg3j=_toBd^TgtklzPKVXg)#xlCT>W^OufZYWXkhbm52{5a^&* zpJQzxtJ?WObkSnDYtC6cw7{}ba|qMl;lA$d(vLjulJwQI(iwkMk&z7O>OZEG8ClQ| zSp(=Qkl9Z-@ya)qJC}ocG4L3M8EM3maYW_c&}2Kpv6&>BS(hmNc5-L>?`#q3pljCg zoY+^I5;uEYcCuiGMnaii>7$kGUUU0?Z++rsbYJ)I`qT{;v0{3Ng96F5Y>8r-J#l}_ z3~bEdsaNSN!>Ii?LAVY*-3sOWCHl6s3tc=u)G4~E+ZL{s*g(=6MFL~}BsJlB06mLr z1xuCx5Q+*Z@pII2iJ6$cb37DE`IF_#kyr6I?6xWSHU_#qH6MtGFVEQ6P6BTr$a#x} z{Km5R!Cuh@$Kik%_?O-wX^Xxh8e{x${4ClyZ{13#Y{;meQwC0U#kS&#r zdLissWFcL}ch4Sp!RnV~Py`gfCM7#pj^n*ZE;c?+I9lszlB!v!?TUF-Jkb>21|`oq zf|1;WRgf#tU<5X1O$>h#wpe$vPFxRRFKJ%?X$Ui3GemtE$Fn>Bey?;oq~bO2F-g9I zLRG<*3c*yDW@C<1zxpZ!Ibi0sKL)vuSPJ^u9lqPUI5VIAgD~_)%qC0Ke$E z@eAF!o$N&d69RDVvIu~xdcnSspA^l_>bc@Y%E<6vD>Scbi=I_^n)--On}^+{v5XHr zi@5L^y}yVxd|+=lDg>>;hPrK9f(IK=wl`x|VTQirI5>|OAels}5P|E=LsC&)E(J<7 zN0C>{mWUUNh_i9n;(5cObKSql>KS_>5#TPmhJ@M$;7-mA68r{E)&BG)WND{Xck{8k z3wI=fbKa9?F^YsPNtu}sUHt)+SQOy%?trI<#v0cJ$2nQPh6^9*x>>&7B|6+ zaXdfaaa>l`GIW~=x?kM?YQhM+xDYwD&k6kcC?MiZsRgr?{oRKaQ4tYu0`sPx3O{&5 zufKaw*gHP&(X%4mQtmL-tN`@`^{OEfps+aOv#z)yz&xDSeQ`VaBu%oyw*RNJkBv4| z>UYgMUAioi8*H=h)ca6H>`+^Osw@Ibb8qieP|W}|*tEZ#h{%FRw1}m>F2>?@T*J7A7q(+j>P^K1-?}NRE{{ihfjsb6O)BU z1ao32hrKDyG&J!p^?olVq++di(e`wh10ZDrs=-kS52haFFTC>%5^NykG{+gsUku&|^P2;C-tXF5e-`1ptH~WJ#GPGUApYg(-Dx(|R zA8U^ew0=n1p3A}!P4nlol2g@=WtIbg`{*An=XxC9!o#k3wU8Lm0y?~u%WzsE3Qhbn z-jnF+Nu!p=AqkgVs^3t9TpYEBZ@@nSd9gf(Zyn)AU-eqF9?E&lIfCWq9)9%1?%n29 z{7t2mc&ou~0lxX>kpGrZ_O*?NVF2sAuJJs=09KK3qyq){fRrPI>vR#MWAV&YD+*N= z5Bo1?3;khv ztK0cs1f$kZseJR|ddi3=HEEn_}nEVwVKmRwNrz}&Wy*lMfPzp>ak@fA#B z+o>*Ii+)~*qXc=x)tR^)4V%GVeoNmj^-Es;#xZYjJbrg2hF*t&ji(w!A5Pn=)};<6Jh7(uy{T+7-v7(ru76iI5T-Pih+tJ-J8IspCjYXrR{s( zeghrejYBo95_8aQTum=+4tbR+k-~N$f9&gFtd4dLl`>cBWJG<81qb}cOjr)t^~eZp z%UuqnP&>s@=RP_ic+&MG;qM<4%;|SZ99R_zR(~#aJ7feMQSd zUGi|lA^u1-<1WFlgz?vsLcElJP=qEAqJkR#S=7GVdH%~n(Ge+R+8Qf4_&86XxybR~ zF=KqLCJ~>C`sShWxYjAX%OzPv^2vK;L41*Ejb~4SfcwTk|Hc6zNvhh2X8iVbTE#2`75C>GkkoA7evB8wa?iwsX1f#?YDW)90W9mpdvo&- zd_yyLH<2U{;Pq2h$#6a=Qn=qPtJH_7|N1}d$gk*KIfrKlK#cK)aq1N>x)L|hDd$y= ztq2?H{dPFSUmMh;6YRM;Jwj#3jyJLTg)@wPsD#z2EFOXys!}5RQ&gVlFt=%za$iTl zX$ekkyG3a~Y}mOPNcXtw|JF7Sa7zHm8FZq=%v7!J{E?hX#f-;fwI-7E#W3q8pKb-_ zTtY7pMkBo?>h6b4z^^A>9rrt5ZvW<%Nh~(0Ixg=OE!4|VC#jgr9)yjI#Ona#0c2F; zB_U}0aY62TaORBKKI@KqPd>fGqD-28%3fZ3LD|9b4wkF!4&b|OFF&9x=8_(TGCRo5sHMAkPRkAdSD9r4Pw1dV+>o%CnN9~*(pMwe=;S3I2 z5+#Y~UnnNX$+^AyRbR#Fd#EhHSUZJs`f|xPJ#4#HkT_ldqd;;^rZjvA|GuHZ=|$02 z@ES}z_h`u;;_!R)*VOpMb7Jw^UR$zDn-!!y)^w!#LH3dH;$J{R_0Z+>pUbnh2Gf#k zg|LLO6TIP9PTz4SN!Dko4nM1QCJ+)w@o2&#c$*XAOQ+>xC&w7SG6_c%TL|A#a@wxk z8qNLpObeVse?I6~?bbI!ndFRRbF|sI_s>Xkm^?m%lJGT#1)rD-A5oJay{l>2=@Q=R zq+HR{Z_bV_CSX+bpG*bM;c~w?4?N!-zK$l<9SY=^<5%jaw^iFgw-_w;A448cRnCL*C;P#CGUl=E2+hE^ zVBZnv^mD(Ky5%i>XBK}-AsmpBa1C)WOx?`rNWN$ZROx*z&?kq?&$r&@!+>N*`g?-5 z8gF>{9 z6b>F|SI&FL3V-wPowVImIM^-MNR{&p#+4}($w?RU&sX_Ei1@QenfJa~!W}v^Y7D@j z5_rwl-WNDK4_+pP>q)~VB_*y-UE6a2KoW}kZKl?hp~e$Pv&vq7{S1jfoVu8c-QLB- zK==QW4c{>FBqf!^eeyv2lO0+Xx1pvR?iSU9tD6V-&kudcS6~x8Jq4BRXiWr0EN|=x zM&tkR?K^^A28;0hJRAzm*1O+3- zqUBa@Iyf67Z!D!H!~NxP+fMKp&vyCSD)_?`5sO1gROs9Pq_cX|H?)6U`MlDdTl#6Z z@d00ZFaEsvZJqk5(hgZpPVOgYFcnM4HymFG-G5nH9ng#YA+8hz3}r&x(MRalsaq?* zvuAza3tp$yGot5^97bv#Cic)xL@=h!WiuT#N)F)SI|R6f*knF@y~X1`Zg1FX`_YKT znkw+V5X{Vj(Rze*AJ(Y8#Y%YeA}oH`i-Tm;PCE*wER}`eJ86atELfe#^Er%brIq`U!x=C{0F+HGf(TX6{Rc!V|OD z(r$~2t}uL33BBz!lAS+b+iBvZxcd7wkEAu%nxBA(p(Edi`5JT^BEWD3X*$u&3U1IB zidI{n<}$hw)I5v^hjJ=DfdS~x>~@7uLz*q}>qK7l3_~dvE7mU2JhRgWZ~Y_)h@XIu z?m^`JFNa)MPM=-Z%L6p0S|9@yz~<+Moa`;YIozcfQF>LB-;*j)CakglJ(7mhLO$Fz z(mNJ@+jxh6d3M~e&;WDCYjWNb(~&j^A?~Kws@+-ma#EZO;@lcMeZIo$+;ZGMiPZRR z4u#U4Ccr&V#)UG;kG&kb=kHK{g@0V|)=|P4^qWX4cx=yQT!-Bg`i}3+b3KS^ILxG~ zqszs#Qmk#dq$iCdn1$aS&af+5!4{g$eAIC|c$D?z=F`+xIAp0!YXQ4eUP`SrRDg-> zuW^|{&=Z!BZ~2bCJ+$>L{6Nt4ChHhH#jYo~b%KtBr9EfCu0yy7Y@>S{ey49jZMu09 zyNlPvf6~k4Q4lk(|Dn-prY-l4zhh$nDT^P6)e0MBsY;iSp!`CUs)BsLv}1hND$kWT zANjzpwI&LEwnzpSRy@w*kvlIj%ISH<@3mMwzdv4J6q^(sxN!L~+Ge#Pt0RWf z3m{|OB{g^oFU2WU3+Z<+@QUYMmh2NwQ_TyWdlyq}b@kxGM9T5kj&J50fo^OX6d(m} zHj%#(NIcSE)N4U}aP86RposE+df5{x$g(&BZ0+@$b8AL8y*KEmh{wIS7G4-ijc6kU zIE-Pz!!}yC9+$lW=?BBjgvFeGQ60sjRdy8GwVT(^f3`tZheckFpU#P$( z4mIihH@0*JTz<11HL9aS7-f*Mc_lO{R0FzUTU!NOl=0(|gNjOoE%$+~?sRgm=YM9C zxedau8muC_xU-(&b30|v{-)uQ9nYOho&l^gld%khO2BHM^+FK`9GvCeZFsP6dKKD< za68QZ{DLT}1oc1P%Fc@3uKawo@<;1u+Q*e}DWMHHbMLjaSf0S zLS*vu4MD#j5Mdg;65!8WRi^byth8dSu6grpNdxEjR^B}?$FqOTCv^ziFV};#FWWBW z_moU38qXH5pD&^}rZ*1Hk8~iwhmBvJD+_vTVFjV=)R7U8OTwu0e7&;?I(GJUZGXYp zKyjVN3UJB!nOHUXCEagA2o-(4?)gn{WWxmkDiAJaD*T+m7vfcR#+UOKEJKKv?|~HM zw7#VHStEr(bR15eA-rVD`zKiw#f`St6H`v>zFhYfEz@t5Xdf+G8i`m-aKi3u-Q8j z)1e#Qeu+f^8oucWQIpyw67#ug;!;&$*pU87!aukMaXy_*$CG{!==%ss00v<|om#{B zaa&v*X*Ct^yX|Mi@*($=Eq^KBQL(3E;^%oBbGocAHomH0a(4{3>Gx#Uuug9+HcK^{ zT^7is68kV~lN^1IR$KTN_jHb)y}vi1tkWCeO&S$u4~9jTU82>Q6CjP!w>CJWHanXA z#1yu>c-W<#sSRELDsjPcNAZxvMMPO2Ct?md&x2Vox*^T1*?6V~1^Tz(@_>uFO%3Yj zWv4vQKg_=cUz|puEBu<013-o|IOi#*`{4~`nWXNm9IYpe8(ijt?$?c7n?r9yEpv(= zCQHag+4le4XjX^H{@L}$KA#B}Qd#pIVh+N9EYP-a=##+q z6y#AOI{E*)8ko@c^iUml+DKw0J)r+|1xEtDwF)npPOHE6ZI@*y@Neqa%QQ=B7ud3J z#eg?K(ylYf3&zn8_))V;1jUs7@>nXP z3vrUXs8pq#X{i-oJqBv#JyXrDims9WYxcWba35@X8!Wf^JFtZ-%|~0zh09HeczQEB zqRy5vO~%Ot>9b$wc>W}p8Aj9O2fZY?qYX368a(|(>wYM`6cuw{x;mH@{X}rAhw#nw z)~P0P+TF%dA7y?$lQ&3H68}cSvP6-_qPMSJrOco@j8izP#nE}{PDF7R)OOQ3cvTNE z)aiQ7v~97)FNuBnz0u2>1N_-@1!IYSl>Q*F#@O4+yuyb@W>&DivF17?9#P@Zn2PK< z4!@3O$)ux5OB2yNizS)6mkU9p@zXvTtBxatURP|ndAgO{$qs%OneXs`jC z#UT<27jjt?%rc(N^Y}Je=tJ{wfivahq-WbhgIBT3q*;0!d*un@qB0mR`jRkuv@wW; zrI@;?NB)mYtEz$7VE%}jX}7zOaKjJph3-1TJ2yVx0#2{pHg~>Fu=|~j`62QnVI0HoUZY5NE0u! zXs@eWE^h^rh8kKz8;w$*(0U^2XLx_V+xOJ0lnRt^J{(rE;LT~f@BMHXNtdmWESlic zCl;-Zp^(b#1I01N^J^rLkm+ARLwqloZ^;M5k0KcS=>G+uo9(fy$Kd06Qqcy(P9d^0 zdu9PikZ0%bh?B(7u&;cDoDyJ2hh`}mGpQ7-q>$!CPOQPj&fYdR`845|tDe%DW|`E3 z6HbkrWK^SWZ$B>wSJEv9VG#1G>!~=w$2_WM+tMk?DGYY;g(daeuLXtR9ZEdU6Ymys z3E$*DrgdgI;BIF))YotnuO6P9zH#wBUvUJ zjkJvtVW&O4gX<+JaU(waO%z7-JQ`fAuN~BzNVyj;|BSkdvtA07G)hQ1|9JcMO~92< zkwT+lrvJ*HWU8k7f)C|~J0}9+0x~YXS$=i0lv8Pd2KBcuZ(iLMIOigjiJ7DnY&(yz zdeg(7yg-}Zlfd+0Gf5~l5>NjuT!3I1Ezgul1Hmo@N3t{rKpF|DqrQFUg=W#B<0O~* z-4TrxTim|k!#w!p2`Q|Q+@2JcBE`n1?^M z>0{w1hm2!x=U0j?qziDLFOn26SD07AF@_Ekau9qYY1~Z|bS-?hg*`gRJ_@X$_#h3{ zg#MBW*VkS`6OMb+Er!?hy$$MWe|ViZ#LqU#I!@)bK@vQ6Nk04fr38~r(mUxb&|Byl zF9;MyT%ua1xMgJ77n(CgcC}=w4@w%!t{GqNT}Lk24rzi`{;i5foD&rrVnS`FI+qCH zR`(vAk)xLcY#hWI%g$&|p9in9QjQeZl3?;3>?)~2$a12zcexmPKlZh z%A_s3eB${$EkT7wNvf5u;`q3rub4LnczE@d;M7gY8n@RfbTKqO50`@=r0Vw%jDf5( zr<)lccTiORA|-Yw!pvH~Mo7AMX1*3Fm2mIS+%`p-E5%HgIup$=ciRa5bFQ6hSlWSG z-~3@08Z{`ECqL~=mc+Xy>=y4ZoaNTGCw-yu#aj{LclV3$Lil zt^OFJ%mAajy1Q>e~eG0F|p#1rI>pPN!ezz z)j3Z))ez0}%(grJipbpujeWG?*oBN3=Pb1t&qtWjaLMOOBu@H;gi%)|%JYq1^#qT0 zh9Ab!Ov0Ifm4a~GaKQJa7gPM)9=C1my;orMqBV6T|4wKrZ{*Wf&7clXE>K*9lS=l9 z9UZOmxEpl!of}iGjUGvwZ|6aaEz$W2eR;P`H#$AoWsMvd_%_1et8<3E0Z+}~w&+1L z_Bl=c@Q4>b-76(5U~vF)$$@v99wF;0a2>k(9}m-yEjR+#i_!E5YE>cUf1v0&P!xrz z7mnt62P7uD+mrj)tldBs7A2kr=?N$#6!>_8Yl0Dk(#qAkjr0@lH0Z_Fl4RpLx!*J8 z5VZ5&jJRdT$3_1Dm%5&g@9d#|7}f9BXnXR1gKpRE0!xU~qs|fp+oF7SQbB@X@(q-2 zyB8a7FIBu|UFPO>feIdTPA99ohxafLu+_@22wV9;5*L=O>en}vHBR;`vB3Hy(y~Xf{2yN~!q~=|H~r`~bAd?? zC5pReIMlJ&%T8+t^U@AT`^gpe^(xug>Eh=;!gC48heugaD{(ao{z6%ybin}%g3e&t z{0S|ExR}r=E?IBZFUq^oes^*uj}}UJ9e~1?mmeKOUC(yS>abxlQBRb*m+rrA*Kt_9 zJu&Y6Cyt(mWS+&g_O+x?y`OS=uaH}UC~J4|A1s{glw$ChOv0q*oz7Rh{Kb!KLEB{t z(fs<27%1bi>B3{Q0u5PYW<|zy<{}^dzqoVGb2tj2-nl1Fn2u^qQ3Kj-Fg|s5zN++d zUlk~b233Laq|~*=+4rF~_KYRbZxMb`Vy4ZGbdwB5RfNCJNDVbz`}UCCs9Zo z6Tq)iynbn$1PZmS&rF`_GO$a0s1z&KY$mmax{hU^!I)$5vOD?!KVD8O?sLKe@1IYN z3?TT7i?-RpaBLQoA<%?PAjdO^ig`O|ELO$)+F4 z6f|S`DM9Q)VQtqPqPU(&TULD(hz8e=`YQZmg$K_Y}RKZ0iUt0eYmjt-b_DBK&tYoN-xwyKAN4tO}v)wm_ahc~#ePHb?e1WHjFx{T;pS#9g$-3~69 z`R3?@mt3fCtAjD-L9ST&^~C^Si}B8EUW_>Hnfht0qxZ?dRGABg{PSHM>&&N59OuH^ z_n#?{Z$x2xo6VeUZFYJ4^UujR{c1ad-apH=DMDQB%5sXLUHV813*cmN7EG0}F4xKW zX{t}y5@S29-KE=5vlbAtjCB+njltiL)+aDH7ZKhNlQ(oe=`$o-zj+pHJk5RD z!`T4U@IrJyt8u{fzm^D0Q^V8V8!TugzAcaYHNAbmL(x||A}}S!C23hzMZ$Q{DuYCG z`MA>mb5>sMkhSnmy>3um$4KUD8CHfDQkIP+17_5h6#^W?WC5dWK^jQiU18J?27Aq* zvdfOw6iIpL5ufwOTSSjwhIpFyJF!XazpZgfSz3ubDFGlaYvm^ zcr8>_j`ZEd+%~F2ZQHUI-Sf`k(KYvCOxeT`f|-aEG`E`Pp5mUVJVwd;n;`Vs3LBes zKV@>G;s&xFmtZBz}qF=s~3yEO!nHVXb5Wj&U%Ea z)z0a|i%RioqofjgnEtkb=3A^f?{+nqX|X_F?m2J56Dti2d8(qrP;?b-6sJyTv{THy z*TuPE94=<|X&VtfOknkYxzg$VqlOm~bIBjx^MNv#IC?@U6x-W1>&XVK0ga^mN7}c} z$*3P;9)PymWtH96H=K&Ku4 zU38lApD|KdXqG=XW&B2su=kqgt^cd(@#^J??Hfa%=YFN1l;0v>(_sejg{Wv%qzWyx z5~F-?ZEBiWV_0ZJ8ljYtr2b*b7(+4~aXelmK6|we< zR2J0psg$%~1HW4iL5m7=^a$Y|3Q6lA;b&M14hDln^ym=JP=j)NQr~_o5hO!(8R-Kl|)X zF{dNUj|j<#kRvD>bgtR+6l*#iQK41*WD|4WO>qaEC`iiGix66tbsc#wq0sON)VhTJ zL#Tm<+OfNC7RmbBIO&8h=TAZ>Rr4z79Dy8R$2j0c03EXzhI2!0I*UzSl9(*lX&W5$ z2_y@a3YdL}J%lA}A%n&KU4!J?dt+`kN1;1`ik4eR3WX$Y+%mt8m`rrXW|4YyY4nK zj7q_$*gNM)W|1+=43h3(CP!>k)FLy&gAf!}~=WE=&=Pa?_0oAr1L4xgV z#mZk@+7=*;-Q{3OjHina!VZv;HC;k^B7tv9Xt&E&@IPKG0~&hKz{><@xZt<%I8s;# z$fxoXwyJ;^V>6Y_UiS5UT7L?3Zw_1LwcJ80zE^7sHfeotEFe<+X^S^`DRVW^;V0Af zOU1xfL+lwjX_$&<0fbxw^Y_|W=l?$TZHiBoZg?KHD%wAUUKkw(crMgQ%f`oD6qi}n zWP|qnhox;_T&0KSo>CWmpOcqQB*6qGF;boxps0D{D(SKg-t1N<$IR^7usntSe=mU4 zWsbD|>>&1YgWj_Y$g9i!1?QNZl7?u{rj=tMYT|Igs)fG{AI_uuj2qJ@K4NLne;H$ z-5Xlzoyg}mS2&CT0hu2r9bM2uzx;R6fcea=+;Hr{GWf%8u_saR zXyHS5Ib5t(S|XH1Y+j8&+cIft#U z2n-yk9E?i{q9yrR$~mJg=GQq=q*WGJYy-0(&_?jMif+bh@Vv45sh&OSY_g#_WtQq= z;wYgk+rP^qrJb9V^TX@v@YqTFoDUt;`;}tbL+_!THCSQ4^QkPNq)yC|Zk8}{TTcbEF~A|eOX-AdEZM|iwm0Aa%e5^-zlM&>cRhrT|5fIe}anEi-?qa zX^wWjXM1a$1BYiRszDtJ{XjH*K8eh1IB47{mR(%!2qoREC2chN0{v2Ddl$M!(TFc# zPpeoSLi>^pazi?wB3AO`@`44?e1K)rxxf+b-=}NzaExU1w{-$6WC$&BA{LJ2&#WDv!=2P=#e~sZ+NbvqBX=7r7jjIh1RW-eQ{~+! z_8ayDU-stT9~6HGpbv*@RcR^+QqwEKFW zLcKe`d1U{uD9{(TMuLvCk8{qEhLywGXQ&kt)^^M}NW{Poz{t*v&sTbI*X>)XUvIqD zK0(AkJg#|jVzC4No1Aj$1y=r8f_4M6AK5sifXH`CVx!lH%-2uAxuejcfdEG`eDW9T z5rh!OVpr-p=f}9CVhdlo@0e7U6R}Qc;JXgfasBAOCkJWM?l0{Z{b{j$Cs`>Pd-L9m zks&npY#q{+>$D;UADH}y)bjU{f-t6LubRPyW`<25WgKzwp@zbp<02vdtq^}3JlBnq z6BJ!@?9D|kBh#do(|7>8x?UK`+0(wUT0jF#R~meB)XtD3VhZsZaBL1f-o(Tlc|?k5 zsX-+zZX_ASSPT>dyxU0ue8v-DM9Za<60?khr+@yKv(4n#cQ> z4wdUsX(qR&21YUb?cJuD*`<#kM^wsGPix&F#}K=x^V*IkRFMQ~xKViM%q{KK*?hH( zf6ok*Zp4UPGu#QwSy_4YeEj(l1o0!mk*&GDu2lWgFXgKDfk3P6&BWi=sFo>+3F@z^ ztqI>B(6Itn;hg!m@sH$JDnxw?bLvxGx@V4sP_vjcI^Hxg8iNO$e9y}`k|hWOtpS`4 zcIpP?x2`L+I2QiM?VncL7$Q-`pJ0N+f9@t2Jhii1Gn=g>y0tRDj=;YJx%U-X9|zrq zxOO19gKY5bWBsaBO_ygB33rCRvS_1aS%B0yh$A06i>s*KL>GIT>v9oTWDf3vCS3o_mE#e9Mj*e=Xc)eY2X|Z!_S}AL%eO!|T~= z(!!XYYJz%GM*xNbpo7QWILrclVc@v*Ge)Z)zi%zR_AdZ>HKZOznOfFfHPp)Ou|8Y; zqm^19GDh<`_%!3z!ykZt%^VOZ{4mmE40%8)*|?4{eE8oCx7mIeIw$vPQAKE3`WF6har9S7F`! z)o6wN`XvI@D5i~e6K!Fy9MrGxNQZY>L5{^dVe#jH@)Z1a%7$0y=NWOC8t>~}e9|eExRsB9OH<-PstY`hXW|@2xqI})kTkztF zh7^t(ZB_OvzHya#w2xp~XP-lEP_Eo2Q#^Y}xlc9F@EgNL5ku(AQ@ihTx?3^exz}z) zvNFI;rd{89%PGL$G1jtUVcG!~r%XA|F*3r4{6r{wf-qx25e#x)qXa$1^X}=a`YV9~ z?!;H@Y@7jP&%XK;K%A% zlp}+$VS2zJqN-S3>pQe0(VNwdjq`{65t8_1SHrqYyF->_xahFhtaPzJ0tZCeINjbk z>!Hd7txjD5>`@%A2ES{;mvw6L5$VE7=eOwlAxj7nIN!KU-+UD*XQ@+wLm%cf=GpG3 zY|)32JyOmtFRq!7@ET1UtD0>h$;$O=N=Pbz=NKuOQ;LbD9LNXZECgFLlR90}_zOR- z8&~D1M)_?4Ue|b83F_M&1>CiL*z5JnsdBfPxo<70f#P8MBOmbjr?T((*@!$K?Bt+G zzT%tHSY?^vn4LA=rQ%P&Cc<6c-VO+S_K44^;w%FnAuT>Ue?e>KRZwTvwC7;35jJ)j zg%1Tx^$c7t@o8KuE*JA&*Y$fIJAbRR(|_Ib$tb=zwfDehsTm4xd8Rx5)vH`?a}JY; z%j>(T6?d5U5L`~1AMWIkbO96xGSJwO-m{&_wy))xfZ!P03oprf@RHEwXHL?%%{H__ z(n8kl)=mZ#VjL~3`wWCxD~S59ReKsrF5uUh5oG~388}%yR1H(EyBJ0Imso268upv% zxTUnv z>bMtlyCg#EP{*9w@tK`3!;h^jh=m19B`$U3E}77NweIB%M>tiyQ%_|+DIjK1`8l_L z2twCYMJd8u%4>k5FB@H#k|-qh9f;pTSJ-Hx{fExp3K*l`2bTX3u+p@FacySZ1=J(3 zCYg>{T)^`MbG(80q!Rz_1ZGIy_3dpXGX0 ze8@9K)J?{BAn{2!E@-@$iN(9X%}&}F&-PgTFx?5VS<33`A>V78ZaaNFie9+E!{uzT zf0?Ib?$M#pSI)tcd@1bbe=;#KVir%!O#ecMDOTV5_OxHF&1_R0wY*4VY_7=_45fGj zRsPubFp<&aY+xDHXY(xc=c6ngw|Cj}wgc(6Uj~w&d28j6z_tJmR==Nz)6Z@dU1oEP zevFN4RTHN7T}ucT=_i~13X;T=znh7kXZ90$TiK)7j%Xd0iH$#j-tHSSZTIY7b=qp( zeowfEG~_;LIPiziTNN_`9uZ*S zMgm<2muatc2!MSKaP_X?#krXyUQt}|U(_aSKgUSpq`+zcY5L!OsQvWJi${(=6rQkH*!R{4>t0rvDxB23?3@gD-C|5MjmUqg5J{ zgM5eqp@_OrG)9@iy`|BNxJM#32YRC%KQZa!V(e`*8aErM2HEk4kN1(j*w)ZNxvbC= z{@Z~ZfOsf@Q&}?^#}Odq^AsS% z7G&QYzlsV6S-uvaodXxpJU)hIbs9p++*d9tRxzD`X@TTpd!AP>WQZ3~%Z@KCf`TjL ztj%o_*2iOfT5L#HqnK!l`(t^oS$j3J={16fNy?s^B!@iOYiL#O@#g)T4K=e(2RX?! z_a&^xDtP3j24vyCp&`-sd!%Wyv-8OsavNP^!6{`Is}4>%UX@p-?))~p>-U$GvSR(h(l?HrB2)BL25;%RjAQQwr_J*=xGTOx?L|th@-vJf z&#HqEBh7+Qp8o-CFXj;1azOhi2{I+w6*!>GH~deR)y5qRI{Qjj|BF5V;`fjtE>3IM zY)WL~h~Z`{lq{K)3HeMAI@)}dbUPk2kqO|iWuVPDO7j(6*ChOmF>SbaDKZh<=)CC| z3u*)n={xpFC2;uFx=J6?#@$3#$BIO}Vkgq#?=qf9a4|?6QXq|z=mI17KIJCMLVke1 z>vt@xVuzkUova^XAKdmija_IGTOUrqkPJIk_vgJFr!$q?Iy`C$+%J&_wQK9CcDMTv z?xt;{IsJ}@#iDU!$TtfQ95i0BHc(t8z6guOn37P9(c#GX>D$(Hb8uT@<6dNg>f;~l~F7{(bj{!~zA6X^`! z&=Cak({DyxdG>Dv==M2deR#SWr>=YN@!D@R6*pf1ZwDZ`<%RP33LfyoZ zn7KGXR3%#ktmQ&4s)%mN>0%yxI`AF-i@SHp5)aHS89qs!N9i8kh0k zh!m#{#|8es6gWpeh1yKWUp?N>e^$iA>dReZyoyv7T28(8(jr9okh8alP%b;O`tC(x z3Qj2S0RP{ld%dQ5$rVnY8OD|y&|+x5_SljH!Ig6HiY#fXAZI+jsQRiA{RgI+B z_;M0Rg-zEZhqDQ!?n>P+cpa%r>vk4(KcBm&-t*6QbhMth?9P%o@83>x1e2uQ($+R8 zp?L)RNg+l{RZ;!R<7k8Lt{NTBwJt*ZVp%a`8*^tA5KtbF7j?v(r-w*c zce{H}41$jCTo;7}F)dGuo44;j|yVk)?9^am|&eLln?;cy2sD*7J(|`By0x9H^%@P)U}09tE>3g z5Sj<7yM^mFTi_zpz1~rsh>&bQYs@1O}H2=L&*lQe3wOm{|=ZF*-LezDIXo{Ft z8>CvD-qIF&=VE-Ty)d>UU2*Niv*BPM>&(=pqDn8EBtzC`#zHadFNCYMvWMc;jlNIU{ zb)HE;+9(1mA-8~wZFJ)-?LHN=h!mIG<-uItiGaby>&W25N8OGk!UE=XUp#)9R3(US z2Qmz%N+xTu>j`J%eaOA%Mbab7uH}(1W!%a7P^y<>Ro3y*;%UappX6JN^ilxa<6olw zAI;^t9+Jw{+_tbj({iuDzyHZ`d4`J3zwQPSnAIJ~KkHQEv;wx&BaUl1f$KujL2hvr z6qm;9@K}A`TW+)oM7wo3gR;r$CI3HtOdqye^-$*w?wtXIXX)wTG2p)U{Vjl$#%3^v zsItsXdhX`#Tj*O4y6JpBgEv*TUyAL$bk~>T;m<;!>*!$y>(6$ges}1MmuA=MS6$?9+DLhvVq&I+F>UTt~ zQUfJwjXSE`DTo}$5AaF;UCs_bD4k7hDMcDw@V`XDNr3CA-Kck9_b;z`JBt*+h<=+f z4Uzw{9K9O??!!8Hk7ON!%Q!xb~7ZDpX zZ&vqG`_P(i#(e(uFz(+hDA+^(2R~?Rq58l|MX0o|Vi1Rm5GgNBq#dkI!|w$W+r&OQ z7`oOi@YByS`^qQF|CDFE#U^Px_|s!6C;7n>+0r!ciIgQv3;&0R7Tbit5ey(nPW>9K zc#h?Ti%k0rHht!{+i|LnyPZLD=i3 zfWRFOI@cT!{IMC;=~`T@~Ace_Iwccx*e7UeG_F9h#%-4&$joIx}p3!)yL_09;aVzWI*vVZJU_a0vP7ul7kZgThM+au%G%80%Q8oq;o>WGiWuctD?Az6AiAgRc59Q_q?N2u5*v`8qeqBiInhlyo(7odQb~_8yY#M&QP1)RI`&SHLqQ%IB``Lt&e(g zkfh=e^5&Jiyk?$m8V;-ZsldOX`8#HoF@XJ=Hsc%jf}@zonWNMp=(c7?Tw@Qc+bu5h z#qx;a>&&88hLa<5!1L${xyg%k2^fS=Xpu^{uL1|JEGIgN*lL|owG?1<52B%@d8BsLR9{GS2#^T~SLY<%CrO0oF1&R+E z0-1&Fdr?`y=1QVU*Prw?_&2CP(ctZ%401^q40ZTkL~TPJ3)Ac}38ZwO{x@bevsuI6 z)aBJ0?%i^Bga^u~aNNjJO|EcwF_=7)sx{DC%4M1ixekNT~zp%qYj$ov(_@#?B*Z~ z-a%wQU+f`OO=5w`e?B;KpT%y !GHF&a_*yGXgG)Iq0g;z{cOZ_Hk(*w!B0@##t; zS{BpW@gf8G9j5|4AgT@3=K_j>uf{KWo%klLDg^@0iJ8&#()bU*WP7j5adgGP8xTZj zy-3)H@ZyKoJ=fEpNF1k>>IMEn@r22&w#v0oSsj; z3xRz7Hs6gdM@<@POPPlEl`dVEzMHUKM0k=M$bJ^T3&BEADR2TwyA=5*jeF`q!cM;E0y$p{e-? zr1XEVYxnQg@jZ}kyHRof!{se2PCrkhI>$*=z}iju%iXDNUz3Ev`nLgJNhkMCTql$t zn&>(`Tx^SwSCtH`o;R$M)DeAP_c7o;CARA$L10#0as+A$-VUml57{{UMXcc_X+tc3 zT-4Z{Y{q9vxmk6Mtd1-QI88DsKb7u}a-96Dd4DwOj-!jlhzLi{k@`b|>*jaHPXXmK zR6+t{3y%1&3K(FO>adE*$gI|*G&|HR+BKR!>{P^W1;_FmS>_5G9{6zzq$0XrXg6K{ zA2FitRAQc3GLINLD3$rDH)@Cqcu6>pIQJxolD4+j4x&8h*d=N|#*jfN=*^4sMde34 zj~-LyutAvu-o5ykBER&};X<)H#NH00tv3f1oCA7<=7D1(>UY>CP>c@#BShkMnv6J6 zQzSvKg(W3f3^2i}zy=N|-i{N17jQuHZ{O~ef)K|&2ba^Glw?0t|d!1tvTK? zIP|qeb?N7?BAzvvkXlP3Ge2rfqSve55gPy1(bT3Iqjh3vE6cWSSd*Fh-*c{)lB-U? z@2e7pi)ZOtI22Oxg_;|SiAlVHNmmHf8d>T%&(&8kHyjIduVLrk*9hbMDE+?E?}(D8 z0*h-M*Mq?x_}<{))Ux|T#u@zYqm2y+X)kbTD)_^YX>EM!^5U`Pw;!C z1{IE@Eq-xgBc8-hx_X1!4LvTLB6ac0YQRd0{jzSW4*mzmCt$kwLhL%k)*Ac30$=9n&j+tRN}uX`L@WpNDDjQ);{Iq?ZBN_}lYpw4dych zSh$D0?!ABG^xKHM?7ZvRckWf{Dyx5zO($ov%alX3fYq|*5o6fz<XF?z^kP&TVFEBftOmC3@e7uVoMM?uybf1K7=CaoO3m?IQt} z23V>KmuFOcln){|BN5OJB0CYaK8S%OYPzqFz^cIhyl(QAaR|)gVA8!9ND$aXk|`fN z*(^mGmhwY;$bgjXaTRz5E|qjYQOFJ0dn`9kW|_B!R{0TN4nVfrkvmPL~g+s(IQ zbA=uFG}b)VnnpjgIxSl_)lQ3DLI!}Zur6KY=sP)E{}mK%N4BN9)(si_W|Rw(Lp9jm z9xF1p1e5DxPIfOF7`cG@wP^)g%CuCyWJaLf>2~{Mw#Ub_xk@*lt#fU`8em(g{FP7PA1Nd+9QzV}(`%T~5!xd@-D0+kpJ+hi$lW{?d50h9Xo zvt)VJK1syIjr5F!g;_zZk8UZl0f>-DL4jtsKev26|4kITPYNUqpDUH|IvF5_X90^L z_OOBesrhF&mi@uYwmpOCOiS6hqF3|WFTM5az2_5v$Jv>>)``2Sl@B~jyaMBG!ha@o zf6AY{!f@0YXnMHt@o$V|!e0ac_4b_16lw&xFB$6<9l*3v)?JIcvu%Phd$S z?R-&=5&G0{;o%aL)REHe1OgPNVG((tZDHNs1JPIGbOKZe)ye;moQHTpr+p;W&YdXu zG}AK4+tQ$$0YL7$DC}qmFTfvIngv&_%{8o-$#G?H?VP>-1&u$`3hG76xA6z7ZV!3PS#}l zg;8{S?=BRpZU}s_nT_eo5HDTHTF%!>zvqsCA#-A1hLlFpijna>M@s|;QvaP(hVOIg zK<4OmB1a$xT;!{%%C!~l^p7RY#y6x3kjX+|ZN^LoF&rVz))wQ+ADGLkUOr{=DxlmX zf&!LnTPk|n9n*I@0vSWTGV0WLP;jsfD!thWUZHPi0f{94&Qr<9k@BROwSWfkMI@GY z?{~0dyz;CSI%#+}h;X2`4k2NC5h;&Oe?H8ioSS#~b`nV z?lt7UqZf(RIvU5nOTZRRo2OUf1iy&trK#%O^0~@g7rkcs2-xWB_91<_)gwt^dsKdf z67_-zsRB7YFr$Qn2FTiy@v~vGYTv3!V@fSFRs;;dc-jNjC!k++?>q zRLH#gf&JI~w&VUovOiUff37y?@Sy`y8E)=9yAeY3DA&?$0=?E-aEd!ZL2^ddXmhc- zqOST{M^&?VIJ(yU4DcnrywvI}!}^7OgUc{khxwE25{#qgGHBaQf6ZKvSW|xYk!i3S>J#7* zwnpTveVDm2PVdk~W=mT3>ICFeoPWXlVDzH>HV_7QD8yITSy2?pz&sNa6@OD(e4y zE=7qg;Wmd@@_&fQ*iyK4)Cl89-3|pxm>b{0b;pKLyV2mu9d*%TFGd|3OENbbAlIer zH^;FCmvir!J>{gZF8Gd{?uFPed0s~sDt(#hfWn_K zK33E*TAx{Dbe?5+SnBU|%4&6Q18tBu;Y-V9eTh!*#-cdobc!7VbBWu6gX-#*AWoAw zoGp9!3)N=UupBYh9mT>|x01Ik=AVVzV4;b=*_J&TuEy&bNyaCke83ATE|>E?zq8qv zkuaHM6RR7B`bA_Sg6pcPylgm&xhF%Rf6}Y}L{`nYDDm{ZJKZdTh81M9-39+M=yAVC zwL1is0rmZLFqeWAVG+6;6D1R*J<0+bF1*9xb={1L|AIx>%=1K};dfTUKG`_-I0jzp zO4l~`*C;gmA55t+3n32iU8XR*L57K5-)GR@Ia~{MR5Pf%btxS;a7q#LK-h*$pL*4F zB_+E1PPA(XQ(D+QT3I_OuS4&9$nMWvIiZ>hu};(_5ArJh%bOMK`(gnbwWEd)q=SWG zF{X!cj$|-ODAns);WDGD@4Y+E+K8)anuq;j-STU2f>=XJY7JAPCx5s=|L;}(%_-Ey zFt!gGfj)Dt8F;jnJyM!2-as}bKPsnhAY{9Jq>tBD=`w@K)CD@<5m+yx2FQq-c02gJVDst3js;Z- zQLzxevrBpZDNl?%OvOka{iY;a&T+p3>1URB$twX1M-#}muP5(b8|Y0DADZ?0C?+Yc z75;6(N|D~%2GtJV`XfUckp?%9J#^Z2zZwZV_DXi`1@KWYcETu*t(i*vu&a-=7L>Al;cL4aUH%~Nmdp?J;`**ivz zFDeE_3{tFUbcVXBe`s92|Miv~G!`LL-#3P?i~f>D{j|pZHu~iq%5JH6kH~L7^3uaD zi4jlGq1uBt?!~_vWbAsBwDsi9&9@VF_~m@tn>*1|R5^kxhj&@8IA=zDmn*7PU=`^; zG2&`t`!l1I5Fra~Qm$~U6Chh|i$*6xCRi+dzopWB@Jsoyw%q<#9g{9&BOrM{KFWXj zCMQAk(U2EGU$WvnB`M|j=yk{lwFs%myztH$`CTg+lTIA)7gGq>Gy(vQO7+TR{EoLA zNG(hB{;@iLa<(bKDp%x9d1H$p99*Eo5#P-MtDSRD-lu8>=1tKh`FAXA;+9nz*3;^r zr7Ajp>#@^Kt{PNLENUx`{F3%SydJT7PmeAEtyOM%r1wrwBJe{YYp5(grPM68I;PDz z>Yb9q=S+|!#f#Tb8AUW!ZnB#QYh zSNgOn)VK7e|f|>Yi8pLTi~B~u17U%C%GcU zU6KxS7*UG?@3rN`<{Z6ZodUAOH#$y-3F^81rsjt|^DpZioy{P#j7(2WxvGKFX&7`l z!|9jjLI31tf=&+BIgUrA?T2^sBQ9{kaj$$thEPVf?s&B0Sc)FmY>dbggwR(iT*ZdY z{{?FQ@;-Rp=KcFqv#GP998=|o{(cq_U#M}!)lB;)S1N!|J_kX)KGqZs?~z>BikbEhLy_O~l?D+d;UD{x3i2M|p^fr2sBg-*bG zBlRJTVFhZi^)gS1)=vPU;8<6UOG17>7#jLZuL(Qz zfCQgXMq`naNZR+ul_({zC=M)?YfhN!3WfercO&4837nHaUi}iNVCSbMt_!ds)^y!h0C)r z4TPms9k%cUa)b2p*iY#!1TtfTDZW1Ff&$r*nF3deVx{1^b_H!@?L@czj3L`+m4<81l;HJnX@#T<{ zxmpLxSCazzjfKqYZgvjXg-@N7Y%?BdLo;5at1KU$U8Afh+aJ%PL}NlaP*VG<0ea50lXAJ@~J#aE1l{SC~=TFX`3Z>gE+6cZ!zdNHnE_?`uZs(G{pARB6 z;PRC4VpMK$1dj|pbrC4k%r+@a@iy}-R6zz^P~!{jP%Wf*^*tulCs$qWE?s%O4U zqw4^%5iKE@+~~1HcDrPlnv$cMd$_Y=Jf^JOt_TnG{}1ipjk^mu>ii!RLTbAwvDs@} zbJ;Q+7U1S;X0`n{FNJ42zsU=vv>zwi40C)}BL>U&?Ywu~Lk_N_9IGI3umH=6_j!!{ zg!4msd4j7QJkT3iN?3urJa(LTeM;?p*=J^D;bhn*XnlVEK7D0VGBYIH9N3&<6L%7o zo19vh%LhQGM`Qy{pzMCQt0iuJiN%sPr+%4+Pkk;RC#?K6Z+;G8q}E~Fak@go;s)jr z5Vv*d*XhTOm? z1Luc>OSZ~Ol>-*TQ>M;d9TDc+_%@nttmY0G_HCDI{pPLb+Nbo{=F>;z`|FaW6hv5gyvlfq$clEZj^%|K8W2$6|(P<|0Jrut%te(Ta=ib@bT~^kc zuz(%K-Hr=U!oSkljNsEhJh3gu{esQj2mSU-McPbCGDpBXp&>$kZ>r*8?DWSYjs+tr z;#Z!#Dy4eG0ah#)WlNJhNZc9wQ@4v&;aZz^T+MEmQ9soY#fg^lon!Dv-)m;|3DltY z^WnPzdhV~I-RQ_8F(IcTVC5BCMO?md*_};>$P>~=u~6$H6LgXA9#YJ8_o>s-D1Usk zd@O=HJ5E+?Bg6Z4A{I&e-%`@hbLV<6VZ{eYFeguW>|yj|%h?L&zDgqR*}ui`yJs{v zCrrAI{2KFB)_6x-e%O|qAhF#~)Q~1J_RIJGrygrFauw$Q%zD>2Ieylrg^6^fR!hJA z?IlN>uzAP|B?KIk{q2ftjJ(sifH8R=foeUdpZ~}hR5w&VPtHboxSfBPotz|OQhVd7 zJC>r1kN(lhoGMGWK%PEBohqQYap zs`rs`^=Gr?g|+bPV$y((Wk925c9-@Bw9aYLHefU7D&pqAE6CF6KvCxMyY!g_avH(> za*@F!8|Rv#DUO-6e2Gu$Gl5C}bi%Ly#I{A0*pm7}0v-chS_`iA;K%aGpeGN8Q?PcL z1e3KQ`;clr>~QbyedS4J8njv*)Vi-%s|SJz`VNFZh`mSUdcx$}-Rjz~(i}kMRS^^O z?FTQnF&|h=(&rcd-q>GbQ9VJ^g{0v5q?UZAW(J{Xoh7Y_R^$up0^msIG1eG>>sklpe z1d>(LfXXJ!){?ICXx`jsKG|=UcY{fq=H;n5#K5zs=0}3&P*Hu@7Oc(Vy?kkRMx9d@ zv)?%WR{9|{FE^g625>U&#CA5*<-z}1R0gUP;N+x&W-8ChZ$Rf{+v9m+;BEq)y_#Nz zSgmt7&VDoK-dQIgcb_%x8a>C;59oyMuFB9WcJ%+iESa6rHKD--T$HX@O9qQXR_=4q5O&Z+*2rL0g?BUkGpKES~0P+cP}*HPVDgU9XGYtWXnN)kWYkUeuD_Pe_04gdWQ;{9cIkFc7eLI)H{8NsG;sZ!hAB zn@Gk@`B+$*DQ@h}+E$=M+JmR|OfG{f!S^3QrR3F)$%fFa<~@XIjjhIN;&*3f^1dKL ze7u0A7olI1Qe)G{(>L$Tu9+0tNn_Ip54+|%lgmZy%;meS&w&g}Oa7TaaV4QCITIn7 zVS5*cmGjtC?{}QZRS-q;;eYJ7Ej+k!v*sc+nC<>HqD~5UNaDA%?hKf3(ZD6~OX&XL zmX#Ghq20PU84yVx!94!ctF_6p(m&sq6sE=dulay@jW&7juMoK@IxJ$>~sw`b%lfh<PO9XWQn{PFTJ`?zV)7gGs zmZNO(xizq{!B%1HOov+Ok}7)$eZCXlXrRF^#_V*>9n@_mj5 z8&Hqd7T-UZ{rmfm!C14`UN|J^u9%Y6VtRs}*fCtnft%&6xT(`;p2~><8Zx~GW{m&E zVMB%p`Gv(ffe9$P~vmqQnR(%0oSOCOZ7`Gs7VT)bfVX2{AI7E{z+d`y9wP2KYy(;Ql_N6Ze$ zq`X`zI~(eiq+4aKIu*PTcA1!;{!lPgl$XT%wtv4Zhh4VVFw^5Y(dcWg@cMS*iYv*r ziF)O_lq%08?F;_kdv+*a=7mHpd2ttFW8J=WCRNbufiT_AvlmVW&EoI?VjuZA>9d0> z{H=)K3bUrjLN(v}!!y(!A6_tc_Bf3Hgvz?FdyHI!(g$F#ybBn-&kk&qFq;kfks0n# zGSJ&KH8AtOB{Be4?ZcU{Op8}QzMqYF3vpgAn7uJaot})nw?^;vYAmfi zW4u+Mwv4Nr@l(yMA)cT}$lI>n5hDlvClzrIf2SV!rs&?H7{<5-Sk)DD{YI?{k0m4s zBuLVL(F#gO<4$Ibc;kH>6hb%W6pPo z*hh>q1Q^6d%YXA@J^#G+9m9~4g?SS1&RK2xHNWI888ml!jjtgN&8@|e*}CK~UU(cd zq)sqtH>mKq!6^H4_2t)_s~pLKD*r9CJ$Z>0_y6`z(|S-{=N&`S8v9xBOK(sELq$ql zJ;c9CPGFN7ea*9*s5-n#FMqE4VTT}l#N?gFz_VDSbq&=o$>nTOVchxCc*ryBsq|;v zNqEplba&ug`fyrS^@^v!TvTRL04F)qVuF)zE=9;#&i?QH&C?GhG6nR*xw?~ep4Z+K z*vku(8IKZ@!tNnF-0WVaGuo{ zTi3vQL!To&9GB|8hIwCFzwAQy>V+?2A>Gd1@#lIQ;n3r{s7;bJ!1xJ31reO1Hieh9 zZu&a6^adf$!~tEZSXV!?PyDPPxizNp1Cf?O>&VwGeU)BMz}&d?_DBxa?!(vT&BTw3 zT5j&O(Tc{l%4!%lA}!zq8@T_o3Cj@iZjrpM44a3twv{EhbG~_DcZTW`+0l&EwjgbU zi1bFYiUnYgnmo4bpVqI0*^&qLj0QBZ^aCk@0~#{&Q1>cfhIX_ER;;b{Y|hCQd3v@# zTZ8FpUXcZQev>n0z8zMJRQn^|)_y!?+dO9$OkuzM+D6duar~Fx#M0kM12=k~vTwoO zc5~sOXg$`zb%oC_PDD;SXm8}acPHKbl02yO6@7#HF+2A&3mpMo6aIFSFqr0tE`!*n zHP=67{Wb)x^+R;ifm4Tc8{!(9%8r%t*LCio=-+Zq0ZhhDs2`w!vT+aeCBGjw&Ch~1 zM6cieTRm;p56ues7K&`Xaph=V+p{d`*bzJAf^@L^U6D5~I`t7$n8XOU-06fab_DN* zf6n-=@j6|V?ZGbDab7fz*%?=gVe8hszqJ#>L6Nnej=uh%#bLMU%n{%%o+?5x=B3(VZIGLXxeGcQb*VkHq~aFMZDsHfO@V6k`(CB0Cv6*4 z27;20AHFl8=CfY9OftsHW|iJadh)|4BqA{{-_`n9|2f$I3(CHsrtUit(_4U%N_htB zUDGDzP(O3{`T17qxs^cQ_tW*10v)EPvHU=)HqJ?Ib1n$#d+T$Uz~v1L2RDP&W!^)J zTFH-Zep=d~B?CleQ}u*aL~c;9DqPs2?K~f^=D8##Y$8}3BEtG<|L_OW&Z6DX*M(Wt)-sET$sbJHJp?uH zHou%PFHzcDbZ1<}V{6c9DImPw*=OSpRdplB_s!Kgf6bW;a#i%wBUv@M@%{pNHG8&M z=|&R!sRe)XZgW?^)c1*9>&N*usMAyh*?-6^8zufb8t*ZX5X=1kSpX-O)0Wz*jP-L* zwnho~o9smeJHGh7?NHJ!t+{EO>AQX<{hHuhL#D>sbqp%Cu9t{EWtK*Cd$-efahH>REFq7aD_(^7W`3* zl-KDvA1|{BWY@aB*#YvwX7Dd9m`uuKg@$Tv_nx}>^u<2P2=uU%|2w`q2lNZY!Z-a_ zAsm8uyD?zMjwa4^)U{8&Dr6=aj88cPacg$Xfjq$RRbqN*+|RqxyCX9IO+DfqRiK7k zd0wltq1s~f?C}UhR4)cz5Ax#QD*-WgDnF#WC(@4~JzRDNA5ZLOq{bicWdpzm@ZQr^ zy$3C%AIS0ArO>Bmstb+TTkr>hKQ>X3coEqlGV&is7Aa*&IFGeGJ8>7UKHc%?dv?As z)DYhorTUoMpM2|EchD`=%$cjSa^X0NfBUC`OA z-Jf}K#zl&7rtINosJ==MC|+t9DCaBzmXy4{JX!H7+PV`nvp0!?>f?^TtR9Bw#yTt@`e99lG5pbYry7olJCm~ zzOn^)K1j#rMlG8lN!(RwQdI>aj>q?*9vD+(P~fBNa&x3YSWy@WSql=$%KYtG_j9sZ zlRN)clwYjKQ_-U7+stB~fBh;BkIy+kJ?<62m!v`18hH!`oeoW(?oHagHg0e^AlpL~ zv<#VzodYTh_TKx)I|A&Uq?(H@5H*OTgpVxeH>$ste$Hoq*`g58{2YEmKW zkj(&JwjXmD`MJM8aTQOFv5!ah4Jvc6Pqr#tJnr`{d0(>O`&@!6Rq4Ui*1PEg!u2lq zuG=lw^*98mom{nX7+xNXA&;qN88b_xng1v$JS-~Rks2QJILd~q{3%iS@7PdSXedZ8 z8edF(D?vwCiIsV14=m#tvXYVhA=(Wk{=oTf{P|C(y4v0PJ<&+YO$2gZ@LNB>RNG7Y za9$d?j`%kd?{X!QXdITiko)VCj98Jkl){e$%Jpof&F7E#Y(=RMMdVGePrtx;GvDBgGc3 zk%DYFzK!-3G$U`u3%{wLEGzXAH5U`~zL#-3T-<~NcRzQ9`IrS(-a%Ut5)}HD?QqBw zAMl%sg!~(8ozb5D$}U(_3oy%O`w#wr;@dx%L%>E&WHS?g;x3J=FIkV#QgRn^s7e#sgO+AL~DV-h;0wSyIqOI#lm#@)rHzGe)X_+#j!)v#3YQ3nD zGuU{CkwYwjWjNw|)4NhjG@|xoy6Dq0TEt264)P=IIs(Jn1G`98Euc0LTx;)sL_F65 z*O7!ha0db65hT0GvZ0KNTxBbe=mStdorOu4w%O0Xd?nqu!W9vhru4OsMgJy4D zrGJepr&?fO(iK;)QJpTq!ARzchfH18RuE(cNX=k0?^E(zujS32M5|)#y}URJ9e)f1 zz0JqoHIXFC{i%Rtbi)Ke4!nmi1fFz@>?8jTEfV?8zzyQGEwgT?yOjd63wWY%VW(_= z1KdC^0vox5J8k9Q$LAlQ(jgJBkn0M{-<0{J`DikACgHTKe|;Ebt;9qcRq?HlhJ-lr z_GL?sC0Q)k-|7MXP5ji0wTKeAL;d6)tXrY68+ydjt!IJ%O6?r<4S8C;@>bpa%}=b; ze{JJiSBb;O6{duR3Wf7);BRuvJ|um5s0mk(3zMv;85KHmXXD0&}=g|;W4%92Jo-LMx>-m4C zaHw5ZknPMbmVTUc-M&xhzkO^dbXJq(L|VA9a|X%v06B3$1$bwa0Uk#pj{mFzI&CkZtN(4RFCeeA#;nX{P<-_|%+)F415 z_1z~2c@KX+KoGfIkBzo|uT0P?IU84W!9q5->PHyhP61`OLNitF|Elfg^u*29P(7ev zKzu9_6$8=*PH3`!>iR9WxX%e!+hzdOvWt<`7BR4Sti{!z@*h8EV9zdMko6<7|L-@; zKGVgwxDlCS547t{+VenG@G8L&jVZRsyg(L#?3*LJub)wZ?O3l`h?$ndR|4G>a3R55mbwwS>dDjwVrz=hgFQa{^#2j?@_z{J`L-WZ)a_VCe zZaWFPcQPz+;3!El`x|Ej{U(Y|HqR`%Aul%+{y2*(I!dpMopZGas{cOlE1%=&I>LA4-sj*^^_cc#R51@aT*9aT0~ z@jLmpH+DIFepWYzDKPBbY;y=1>Bancn3wC;4mYaZd5L)Ofp2gID=lJAQfGZO$7@fnNzd(w8G9hpTSKRRh^gb3;Kg zz%$-wr+MnLaS-JfN?eCWh4N2#)?o$6pr&P9;(mCfo3zO#!aabkbkCmiq6xe!skcf) zaoC-lmP_YC0saV8{b9Tmy*t0n9Dt#S;5m@KEiY0Kmyv+E31w|uZeYkC z@jro~&)MTVpAP+GNQtP>d`sLVIdMmf*g%bF=En8M7$RBF6ie2uXD5#?rq5i%o`8r= zq_|#jM-8}J!wQwZc=dr$5=7O2!&ukzKo-*cR=5Z0Ik78!UjY_Gi~$+Vw#^+cWY!S! z21?mEZ)}!jq7Qd}R*bL#i_(MFnOvUP3Vi%i(M!aA1503HdF%R_$F@OES?BVkk4H?p z660VsNs#4!mg7=snw?e)2k9hPWR0{G{*t z5Dp;~Yp)l?VN!?x(PC7rnXED*xNid6t^w6!Rk=I=;#D75_XKh7j-x|wS-k|Az-Miu zo&TeQEe3VKq!Ph*0d_AnrHSiE^2QUfUwDEXQ{I=i*JGcxPvNjzHFe!rKH*#`rJV5T ze|DJoMMi!J1Ye!fx6sQlQTRlJ%PcP~h8U-bijU%{J0M`Hz8nDR+d_+_-_}$5B zBpE1RL9QquT_BMUd~<@NIr>(4vWbbztV5u z_V6Ol@{tK{TS&ZIHxxVpNIP2Kt5jq$Xb~Ov@8d_vd~Ki-q@$LvTtMEV*5-mY%3^BU8zhDkVZA{2s{c94(%m!b+KYN} zq*CEJ(UCNNu(gfsyoT(@vV)U@mr^}r(!Mw3%Ok&oyG)kgu?UFagI+{9%XeNG?@}Yc z)(*Mki+rrfGxt~ft9+^05qzyC)U%zS}63-YYzss&*gNv7?{-r86X0gun!>!X_CzA4br8BwtlRC8XOP= zH})3B9*)VCzE?ps-GHP%=+0(V$(pQC?a_4BS^x z%B|3=ERLWwC?3(YOS=*=D?EME>DQ--;rF-O&xJ1)hzBl%m=(VHdcmi@=q=^H6BeVv zIe)v%QtheyEo9~G@Q|JSS|=1boVj2HK^Pj%{Hm&5OZQz+Tho1E!9v2Au4TYYmn!J}IC#442$fk+^DmoWXwFauge`^<=Ug!d;8yafxzw9Kab|5|MPc$i6qDAwl-|@h)+J(&h2gVJGCH6x}YnF)A(qt z(7c+^Q|*5);|+JU8?>_L7V0{8m&o_PU${;9 z4K)lf-8iC?Q!*C{Bo{_Xj2^$|mNt=HJIyaOq8<;5h?0WpLB^(tjjkP5c%TU$IK%pH zAptBsIDZI+qrgQJana`hxjXFuCl>{lp@HTxNX!Q%fVW^6+uZ{rAg*~{YiLQt=o8OJ zNc$tC6$t4GY2zB6_eBgVD^ul>B;jy60}9lG20w_Aq`mg!yi{r0$SJ&HwNuoTM9sMz zsoE(03|DYr*n`soF

LnrnwVU`&y)JkX9iakV9u?@St+fI)F2Q`^%6q9Me-V?W(f zCMZ2#=l09m*FzK*o!De$lYr-g=!M{JKDGaPtiC5$VBU1rY(8mc3PF2)8=&A#eH169 z$UpMQmq{`7j+(YJB~UFzMiAtD)_wNVbv?T3-6}O;s7Fk%7xdU+$pomwsEd^2`45p4z!yF+!TC6T^?# z8Uae1^#tTbj>_L8mmBR*FTj6?B+?=I(c@}evClZnm=6^23IoO<1rKUyevMVcp|8rF z`tS?{eJV_jc32FuCo%)UA9j9>%m!J(f6M(wR)Y~R`~pTKt22QjCMONA27%>K92Eo- zNE#|)x_9b$iOIR|HhSa{1eRs7AdIHu`;>bDL?5I6?Jpb1G{t@JC*U8T218y4^;t&9 zvE-e=Gd^0~`H{*IOB-3jLhs03I`<@?JLmg|AqHmkwKV6f*hLh~#6BjL&V!~G=Q?no zO{630E1d3=E__>2`CyGN=DQ(I^8Kndn6qY=Tj<&kSF!)TWMA736q=_zu0_w(ks$?Lcpqza~w$jjy%Kf5yt><)0JfKQcSh+ z3ic(ueO2}dcKUA6bFT0T8L_a1WKCSug;^5oC>&n(Mpu3<*4nQj7!I*)2W@!%tEO^a z?A{_bY}k)ABR)IG8_WZVA+UC!0mpt?e0(?p+-rzEiy)LBZGs8tt8V2_WDI`@h{gO< z(G}Zz7o-_^}`P#t2Lru>h?Sgau=+R?pci8 zM&in{-CHC2iuA_nznl!W)@`Bp1*+!hS>KA{8&;ws2KJiWZw5;J~gI^v#DVAr~^Jmr0bH({7xXh}tR@+Dwq`L$k-(8C7g zH}Ft(yp7X<&~sI3eN1sWoCEIRawEP;NsVzf>={CUHDt^6vo+<}-SdA)x9Eg_=6FO$ z$EZ9q1uOx?#}i+1dLj#PHPbiBbdfZm22q5EC_viVJ_?LOVub6{S57Wsk*L1_v*c6m z$CWlg$CFzlO(Hz+PG093p0YORz1{F<`#2hL$n+sb>`^tk)IMjr*%f z)*Q#1`jq_m#9*;v9R-orU|oq@wFgID{~XN*Wsg6G?k<)Z zi9le;q1uEN5aEW=-em@C?nn8v+TWtM0m=p#J;=%bqYZhPA}eWH zshJWx;qh`9#NX!+m^kc*Rp7lu^AfBNfolKnb>}(?uMQEqQLt#F0+4Nsr@Qd~JzczS zcXk*f&nvN?$SrWmq3YXjy%*m}|4#c(?*-LBQiTFNN$jK`Y+HE-g|bKz;eM(ocR*-Sl=Er01G_Rw2iLZ z`*4P(069ocKQ8-#-yc1V2(*%%dk>{wD?Hv`MG{kWET_=tM3E!?#VrCNALoAt>%G0_ zo)2J}#=vKIk?ex`#d<9~z6g4u_7XuH_@Y$nmGcp@Cfb&b4wr9skBS>T4p#m0g3jkm zrQkZ)ymly1>t70TqLGqdGAAkujNEb}q{ixv=mm_kvi}raMaD7$R!~{OI@@e9ts4ds z`mDDh*i}rBc)pEX(W<$9sEEJ7x$cPEL^gxXz~ai(7U3Axd&VleG@=!IGO|890Xk`~4Q~&w!bgs5iK2;h_I$!OO7u`HSfvZFb_9P-M268S~$1L&UG@52mTQ zar=AT_nyo1)ylxp^!Z>r^j+HbXtv4=8d)h1A{AQ3z0B7Vld1LM=B@&HQXlw&!jV#m zy_r%8i)pmar@~9j{V{0&YkZ`PS!w#WayV;si^bU~gSu_$h~-~O2YSr@qoEm`hcDah zxHJO0XcjR+Ne=2Hfr9UC##-j_uii;^X0yGc6u{PYLo0r`mTQ+B|JQ&rx)rqZ(&mq1 z_V--XaMA~MVj*cX{9~UM(=c%;QL8l!>?#x&c(Si-Q>WcZ4nyl2?ZvX-{|W&nwC&;K zkg6Yofk_;s;Cp9U&ZAtC>0EYC?58J`Z%Y#LO4I_!im#KHV5IJcb5}dXg7*3FMJ{$L zi+_sauskURtVA{7^c|6fkjE%7DMs+A%^6sQo`!!pRlf7vk>}?3T$SsPzlp;haY!P| z;$LNvZHax4IdU?x01$HQbdkzVOYR!`GmtUhQjg@UMC-e)szm2A2)N4VU<#%HePKk6 z=X6^bf*k=teGP0_LB>AS$?qMBguXKVMRn64?GA9=L?K8Q;54#xmBH;QO zI04i?GQcpgY_BvLxq*zNY36F`fobtrzZhGSlmQGdM;TgNubh7nTn|p}4Z%-0lAVa% z3asu{aAei#;UiMZ8v0zW?XPgoUHhfn=`sj#ffH-P$83$@D_F(f@c!vy5=`R9GtuU7 zTV|wlP0IBp-nLaKTTPc^(C>TocSRAE_@fn?(jlGp#BEKMiM57;JmG4M&bpw0R%x7T zlV#qMSehzE5wx+Qhvlm5`~RcqO5>sWqkfwxm87zp7D7T1vNV&B$dZuV$eO(@N#T-E zvSd%vkPu=*_LwY@t;xPO82dV7o#oEG&;382=Xo=)?!3A8_dDl&mvc_ENcTsY`Y^3% zbNk)4M)2E*M3T~>8)Zj++{zngK4N@!3z|J+3j9kJv<<`>QHE&4Up5Q$+O+9QA~wUc z^(|#GZYxZeo3xEz_OQm5;dp6%D^fv1clCpE&>~BwIR_+yU!eS@I}K0*#<4N1KDRE- z_O+yEjhbIuBUlpWdHqLrxMtMh{(o1OX(g9zsyFdv#CmjxHJD>a!+a+;@oG8QLT~#1 zHd?{|Xled$9D>E6q9=>5>1orOf6&+m7!I+$cR#I&Q)4?mbVQtnIpK29>_%DNwZF$O zB3n+NnecToCY6e2RW0I03icRB*7V_?G8)>j_ka1^Oiw}PM3en80YSnE+(17<%biQ( zdZ(-78NWi>jC%R{nK#)36GyzNlHrr&f@#3D@^W%d+Q4foNlCnO-m(-@ylf$!g6RYX z{Nnl#0lrma2Lu!x6l+j2$$8awA>_zy7w9_M>=3Wax8#B9mhTt)_GL(9 zmb_}Akv$MT^utgR$EEi#UdrSj#*QU5aEQ{Lnd_S(%C_w0e(XTD9FZuru>0C%v?yFa z^zosKF8kj_MB8q?YAnC^Q;CZeYCypObJ~y8+Ggu{DBZy1oddCK9oP3vSV|lTj#+>5 zmi<*P`=PDeN;NHY)vux=>QbWeO(9SB;i`=MEO|VA@9&){yH6ps!Rg+tqrd}gU@8jk zGYq2NrzGi|>}jF-Z-&ZWc=hGh_3z!7x1Sv_`#*JB5506>#u~WpMH2UmABGPo{w6z{ z=BV(yMT)^3R*c)8)ppM#NZSbsZ>?T_dGTcI1u&!Cfe9AaX$}T7bUC^tdOtB2uu}_; z3mUs$ULLrs(rpyBAbWB-XRB#9qJYb3T=i`N#L&+z!?|?L9cs>odQXGxxDuoKhR;f8 zBJdwS*VaR2d;q-fMG>%RbygKR5nZ+iL&3rgUsVA*{`0fU;MV7_pIpoqp?QHrBFtch z_b)D0%FrhE$fxQ2@!QmydiuHy+Ag7udWCwH+9u!zj<*K1Nh|a^L|4qdeER@<_XfR| zjJHqy65>G~;uQe;=7=Q=I%6M`VxeF0D&V=XN&%&Vv5-=HkP1f@{!&ga22Q&3wcA|bfK>g zeZX`kR?=%OE!S;h>1>!Yo!^kedv@aN5LtEe?924!I`3J+vlF68@*z1UpMR8oa?bjF z%O~@C@Lyg~WxVI`blhg_550=-w;XkT)~510kti-~7qj4t1|pR=X^ZWcxne)FXAyX* z2=5Cg{cMq~wMR*5Nm8Io`fJP`AE}?Ti1)jSXTIF8J2!lj_pQfKQmA$?SY3H~=!OA! zC-^WWmi9%#5;BL~gYFe6BoQ)HGTzDfHH<-8xSDz$oQ0~|4gNu0pT9{;)uh z47J8wW?J2e?%l4iHMIC0APz`1i~x(;{vF?myQ_pNioTHnRY1k096Es6BIGjlAo{mo zfgw?K`*m=L3-sn*273K3z(wtzP=|ZlM=gV@zZ*uW$>jmAcAgQGfi`S}Y_;AwcBWB` z0QD$y#ZNoK;h{=h1rhmEU;ju;3DFtnH=!6J4Qs^{w@HZ`cj5^OfZhFXbHpuP!8m5d z7Pfv!fEW4$4t*~*4B)Ew<;7Wni_3Tx$di27Nwy1D;QGef9jq51&B_p(7(>Ff#iVsc zKrG=&m=~N8?6Y_$jsQMYbZ;8(ui$YN{u0G?;&Mf5-$k)Iz&!Vz#1Ajnoo^n{36O=p z?(gOfM98z6Vk4zko5A#O{12#vr|cj$Cnt`-rr+yq&Axlzm8EsmCc&hSqIQx+#cNU# zf)=K60qXsyDK>Q<6^_@Bi=4mnvBtFiSzP4!F;w9n)82>whJK4h9JzHo4|DaFMONUxqN~3;QOm8SRu+|w?zUZKrpZ=C22uy0Vn7(&h?6x@GjMOu%DYxJt{kqBWN5ZV-_nX5p_c(DOcHx++RHZIqfWm#|+(K8B zfe&_GC~FBT#%HXgbNg4x6IMo*7E*)xzx7zh{Ur#DcrT4v0>da;Yr_j>w+E2Qu)4@Q1a&|`aPEKI3sK6 zOg<=yr#Nm*e=QS;RBdp}(j`0Cj6US#c{zDUEBld>ps43BQq(U(i_r<2?H*kEoJxf& zJ`y>G4e4<73mS_c*FSB;m_OpYwNQlA{@yF%JN(G# z^{lhrrnwhi;2{5&o#sGYPe%B+DqNiR$7cuFRnOk?BLn!MszeAt;R9(Qf9iE_f zE`7`O!B3dTUtHkHV19UDRI4c{f;v=x2W|)i^*yTj339LndHn-6m`vrxj!lmR2DK>j zy$K-8wiZYES1_}}rjVmwc)O3o974g{n9MfPm7LSu;|cWQoCH$1(Us}&UOtP80_DI9 zaq?n~s6*Db{O@wn6=5ymHW8n7xtuK zv+b86R`|=cZi|Bkp97UMCpo930egX?*}tW>Yi3Zv(Qm?ihA`yybhsa<$c4PV9Ktqr z%;UB9N#~@*LjJA*HArNo@5ddWzg+TmnAs>1LBoclMB8QAc#V2MHDm?LZN}u8O*g|K4y)V0=+K^E%xojU9+4FdoIQ z%eXW#I5;`Wr&Vd3d!2KRYQp{mfpadmUWDHJVZh6`RLfU4h#07$=Rvw>t84Ba z&5giW9B6uV%xvOl?O$5am69i9!?qJDM^0WnbC|HXPP_a!4-Ch!t9a6GylwXsq?NXx z)H*mcylpg_^^wL-^1OdAK4(8+nLOI7omw*$MR?&%srh=oo^seUy2j16H^A@Xm*Km= zSM<$f-=TLzPn-cOCtv_fC+jx>OFH z`aq7}+%_zTR=U1Tg6>=#>06V6yq1r_Ke4oiuWo4MVOWq=#Kw(m(TOc4Fju1qTK^XD zb2m@rQ1i-9imD&DEF%8M)CRLPbuo9ryxt9efRSlXHR6+CVyoKVepr;EA~35IwEQUE z8!LX%-ncMmSUfS5@1IJ-@qK2e@ZV7!#*XENw>d~fqVJ!?drKxa?s%V^Hw){A`o^y# z=dRZB>mqjH9JGj~ty5BL_$SMox!@`6RRT?lycc_fpbBf9P_nw9G0(-ME3{!V3N=k_ z@b7REXm9Fj9~Vp~K-wz5b|g!hCPfF^vZZ_bugEdBo`7uHk@BtBQzgpbFK}q)BrRTK zA&WWJakxafVa1jf(^Ynoh+@$APNkHDohRepXxu&1NTROO#G$K2qnF+Zpr&TST&E48 z_M%ZP#$8Pv5$q>J8apB6O+F{K zXiz)N8Begkp|BObbG~8mX$!kdq16E4gkI(sFN1H!*TM>NO#+YDx1gSB9zRCZ8+jN# zHnP%2#><~jlcBP;zijS0Mq6+gv!8Y;cuAm|omFz3c7Q2(YTCJe-GQ?Kuf17#Iwxx4 z0L6mw5}@>~-2()sbdP(J@hg4@sc|4P>V6J}_%dHm)Q|Ewe*XckYHyRvG$opSXym4C;<_T;aS%QHgwJG%bdhs zY#;@zaBtRi+8I`5UJY3R>Y|j9k?`8^AWfC(fTN6&c>&oVAN{L}%pq!vSs6!=67=S1 zO?`;+Fj18cxZ*Y@!oKhW>9c(6+jznM7s{P~2g7oYTR-^gGj}62OG{T-eH|%#Y$oiN z*#>sWH8y{)y4vO@7T;)f&XHTTq+suZy445KBXgD;3Hpl{!?oSAEn>|I#*6yY%dUyj zc=&ba_FURJ@VXvFDKOGGBG`WPeCI(e;RUCSC~#PYs(vL~9!OH%P_int z7W-D4&2J#VYnaPPiP`f=d25lDEEiQ0pP+Hq)HIw((_Lif1qsY|D>fzE-Qb;bt#zX- z!4&wF(l+NE<)A=c=&d}rQ%yN8Qo5P=vS-K8`|iMAlAd3LG??kqoNwW5{~+=4ihg@V z=*J*o03jZs;gxZod?=G)H7<=FUyOg91HbaVHDDHU8+gWei3c-+M_a~?uV@Vg{;L6O z-UN3%d9*+}a{J=pAk*%V+~6Gn8M>4apLz1P@45ra`XHgjklHUt)v~dh;$^w^9KzD; z&^(0Y^-Ev&WGO1utdJx-{%#v}cLGS@4?xzER1GoLF)#^EP@+5i92F8;O{f&U#og60 zx4%Tj3EUWpZBP7tq4R;wJoP$9A3C!uoU`^-JwM9IS3UC(woMqb!&SJ!`I~0>fTr{F zTVr8?`LBqg*=)L0xAoZ(sw`a_H@;!#-!6dbl~irRULmxY8JO}yJcPnuyK)W^5PQ&u zF_%@_ymdj%T%@n_--Z2S+3+i_gUXAjV9n2DxD$o|E!4{s_SejBYe(7y-9I?0t?15w zzrQtAe%^xYy(mx4-+Ij&zERSamN;bp$pr_W@g9kfo!Lj&ZjCgY9}Tfy4?X=}g?s<# z_PPL&uWO7NA5g?>$Foqa0kb%U6jK0j^5>evyb~$h@Xn{d!WhGtKh)jc5XE(tK2w(b zwo;DslGf3A!`DJ1j(&6Bob{81iX){x z>n97;S$C0epajoHMR|XI9MeS%g7?miAm5_hvABd_&!>*ye@sE0$kk{+)8Wdbxd(i(e@ z$=P?$^#I;=Rqb!e1kFqT%4JUxl4)0SgR$)u`15JcoKLTTE10h$SA8b0kC;738)caB zvbnu1g0{6UJO?ke^ns4C1a~lV*_gjD=H#7nLqDd`C%rFtS$iXQkY$Su5qAH9i@UQT z$lZQEGCbqb4R>!F)6l*Q9<}_$xOnx2ckW=#SNUVs@dUO4&ezXge-691-53=mcffu! zZNw;?6rS5^74&cXz4x_cs+}>*1N_e3>O?mILkX@CA2tRQ>DnmV#q&z>OaIjDEkz$J zS5Ud^91|`o|DJfa7h{Xx%C4U6UCmRoJPD59GHTZuK2k#E0&TDzo?xRH+rc9y5A`A4 zALTf!AR9h}mRuyY6Z8_I7qi0&YRcP*x%gZyZ}_JhOYM}q<#4FekEtTo`r{go82QUqO!svQU3*UvX^ zKB>?p`Is&{YK~f!nTHkYW(mzBwk{Yh(CmtgvBm6ELGxGY_)#Cn39g>i#NqPw3<-%7 z=nBcsO6p1^-W#$pc)^JU3jm;M)f8BT(aK9f5DSa3FJ4Jorf%36%Ps zh6oWr7yJiuz@&kwvHWf>=Hd=4Hm$K$Lxyj_&bVQs1-yoB#6qh7GT(~$j1&WD@W)jb zI=-#>U8`2m8sidteHiieSOujXs<*>jDT^k$gWG!bZS$%yW0s5r8PDf!^yUrHEa5$h z%gbBk7hTcb5-K&W_n zB?H?)QkVpUib^rmhZE1Jh#i)Jg&U)WPCxqF_V}WMp&&Nd;HmgbEG>u|!)-SG>M#4p~XL&2iz=aX>tzdOu$;bo$aPwUtFO{!EdgMz+g z8=DKQP=lw~f_OizstnU`+eW{sbNF{tC4K6siuU+DFaQ3$c+`M4C?2{}thE)7PNNm7 zTTD!1*)cZB&?)%!`sjoXgDz|q*gro`U|bX*BMJjb#^(regl&B4+V_kP~##-ByL-d1hfiWzJ@|@jHs{GF?YF}ETC3#&3kn#xoO57lIqvvVY zR()d$;>mL`DWv&vA~eA<|ND8heuzQ(Q4D#=4u3j*d1p@ow+d_# zUh;t+z+7R~rMNLB^MkHrTpZZWL4WhN@5q~K%3t<`g|TMD9pBfObnEFhNEK2CPT_X& zRnRKH(U3UVd8q zj5|2dhWq{W9o!Ud@a#c#ky7Cj zy8M&MLBhQUmX~E1TB86{WGD|$e+;*CG_c}aF;CRT?gVEoBqUfXJ3tYzm4dAVg z1!VQ$(Nc5c29_;rf+o>9Izn^`Qf)y_{u8-$9(0WNzj$mX@R|jVusjfRGQud?;?VGi z&8Q3-KCu&^HhOIQuA}E)FZH38wPNq5jDh7s{k?j@exa3vynEB&wW0XeFne$+Y=313 ze}{>*$j=@pebW=1+Q4$sAw4_ZFPqZRCHT-8=ORyIcD7tnd(3Pjafo@ciG* zrMpf|_Cg6hDFuulU(U->_JsE)_YHlqrlCxkZ0P&WFhg{OrD20CPBJOn9Z_H??Kcnh$R|Pp~>L+0_{6|U7 zu563|JlPC)gq-Q66AXZ`Mv^K%^Gx8+cqljCc5Pdh?-|be=Dx0CihAGtXAJ*%zoLJzTFH9iOt!%AoZj#CAvPLea3{I>{Ngl_%}+WO>EnSy=CA{U;>V>VBCebRzbn zB-tqge)Z{CgB!U%#q3Z8I7F3N(zzTzCF0IQ@|x$8Za9nYzYw#!HPrq9v?R;A$s(7a zvwZj~_Q|HkbgF%-;)PasD5yW5h&NbHlZJ)~mvS_nPlaa5)zmbYZ_a-Szf`qpn0}6{ zP!ruE){z!00|%3!&g$hq8@EgT4EFu<*&iu#&>f0xSa(kx$4~t`(I-i2c^pp!)!tq8 zl6c#O9o+vy?Qp+O<*<%N5m_opama?olfD(y|B!EdFWg14tDBgKFE0{sv(J&Uq!4wbxLir2N#t?9=6J!2^R;d6NbSH&aStIqI63v;?AA8!;oCH~^_W!W(0<^3B! z)Aj{`sA0n`Lupg%zK*o5eADTx3^ReAPxZlGU72QA#o=(sC0N4!#tnb9t=AuI;QmCG zFrXV~e6XNJXnd!9dvz~qspw?p<>vwKu^#brQJ;O1jOY;r(2TmO#k7FlEFNLLX8rfJ zt*G797H;>dWybN;S1JgXIT3P#j$y*uR5IzLM=9?PawphZ5b9lm_`D>d!$ zju7mCtNq?eD)=vdlsgOgCjJ=&16xt&Hs53L+jyIt3wu=l2sjo47NTpdB`SaCQxvLh?j9febP7?^$NgRI;K_e`C=-V8K2ql#eU`keSVgCQ zH5?+v+{!OFQzQM}lEetu8rUg(v52+Y_Haw=W1K|qo>+Jx{*ioz*WnT37cL{!hFH$)^QNZOXo=vL*BD9qZf<*I9(ySlugqQXN-?&@4Z6*_wa>!yJddH@CV~dO$sbFzE6eE7-{_3-w^79D(kx^@eM=# z&tzhP8RBe_f8~W_!_}yRpZYhjZ7$oH9*^mG(QmGm zfKEiZfLOA3>#hRiMJ=B@_0hVRI^^3vr*E?T*ypJI;D%3C<1f_^#LTYhJbUQ9q4|tTTMMu9t$iiOAVbsKMB8xh$x&fS z?4wv9pO_A0Tkx;2jPgdg|91~`h6NxxLk9?dAs zW^I$1)W242mg1i9np&M}IvmF{roC)PiH9nJ*C>s7K0||)f|s5fn6Y^YP3jsJ-xRM~ z^2u_Fw<1O0myV}3G`5A@F@@1{#T}J(-%&QUkb6Z(MH>9)Ze_ZGs><@_GbVqe^3BKL z0}obw5nb3Ea)17iTo>aIOYnsdRnHSDpCf9#0lnVvL_9+dUTPykZVqcZatcSz`Q z9@-sDIq59`1h2EPB-*a|Q*L2=c8@pYS-!|u(O8u*Ks)U23^|0qm;?&(eu##t%H~b(K-J&EmKdAx8(PlOi9ht z%(W*DjD{hO!%3Z(5<`m1pAQHZ_;+HO8L6T`_+r@RVn3zI<^t-3hWGrK~~@uo;*v4q75>6GpwLxsuO*E0~>kNUS z@A=>}UQpvu`j&Q56wOhjJAryd=Mr@M*sY6ssAloT3D#^itv(kJh02esPIb^~%v(tBvADX@2)q75JGm zYGawK-;O4ZctaYO=Z$(YcDV2%-Fpl5iEtm-9oZnY;9Ozz@o$v!V0(XZVa%o@?%j8} zn3Bmy^mQjtUE)p!ST#L=`TZ=R+8nYMV^YPS2um|EVtE@HMc%h?v6EIhAh&Bb_6zWc zxdunmkr+c9d^A2wxECshDyIyMayhqFf&r}|hswI)9`=Y&hSP*?OUNpZhZ(4b4(Z;; zuVux|W{y8#-iW$q(XC>)7{Q%l-UvGI)|l z`WR?xd-+I}5oV|kZLKfW(swR&|EqMR;1{p}l4Y9;ulcbILDX{mZ8MMy>EVbpTz$^s zxus3Y+`^0)(zM}!1Vz5D21|r60#j*p7p+3ByJJIlKD4G$_0a78sQsOed;_&n!;Xi5 zE5_tegsi6nU#$#Ybus`7Pf{%yiH{DQSm|>yxwg951Q{~06z?7W?`;fOI2IjGHuR5C zLD$z=L?|?r;-lZw4;ltz#n1ehDA?1+Iys5`X%-Su zp&E4YTo)VamN4#AiFO`4$d1+=%X^Uo@5N#jl4ESuQ8cFR=jlg%ZBbUjO=g>!%nti?L&JBf8I$fhD7WE)R^)0wfVeJTobbcB%>eK|jytL7Mki}?Z3 zP}Wv94`L#~Cu&h{v(zgT$(o~^V%$Om~dxw4lCug)*8eZK|W+>eob>$%i zRf7SvAqQp+6+cVJu&AIZx3$d@a*5wj!V*8%+IFQIY+_^&N|5`hrx9qDdX@i2D%|im z2yypSu&n<8sj%f&4Y1%U;o(1(dG9D%Vj6(*Q<|>ZPNdDOr^ctJp@5vKv|N8lQh)kr}yxlK7yAwBbO^mhoEz&owu`cF( z%-8YiVe2EODrVQtC?y}Q4Hq%dell^p_r)3r~ui5?XLKB$%wVYc4dg`TrLp5!viASn7)1- z-M>)+e8#7&w-Ca=|GTZDEcoC!Qi_Ry^^hO7%J5Gy&vyM$z}%b>)=`VLK`xN#1Gikj zi^;1pYVcpjDKE-^@fqk0`DSL{n90(1SY>HO_9NatyJj2D(3lJ7!~*Z;6JpT-fnMtjl4ms2Lfw+uE5$-m9ChN4+4vQ%yBZXKP5VyyI2kW(7QPOfJOVr z8p457>@UnOjk(B0_Ov$?@XMCDK0Ljqs_c5!#p`lm0df9ka@tfM7lOekO+t2cH zTJ_EsP#*nDKf=|@;mOgdedouGZWZ0M>ab6zn)<$6qx>SM6!dPr2n>MqB4dF$oRfAN z%5$F#VtJK{UF)U7dR9pCz^M$z{97-|jTzKCT?f5(;pB@-pRA3bUI39?jRESO016Px z+T-T8_+X~ka_Y{!7lu7prNQzy#=9O!N}M4!ukY~BlyI$9XjzW}NCUFS*7^p?6@#oD z$7zX6qmN{K5`PZ2hIpt8)hZosrC*lXHL__L->L%5tZKx=g6V62$dJujT>U5 zW&OMm^@a9#9%F%Z`A(d9){VpUp&@xIrNt4p!h3jQR<|tkQ7Si+%&CN5IAf zOPnezTN&}>?^V@1c(wTM+AJZTb|D*apsB-^7jFp{@nYptcPTnf-nfiTGqVl-fldKHXgUuv%r1ophs>^xd}m%Qs_b}3z{z$(ida9!EM_Vw`p%3 z@=MHx+;C(lj+X4KIw@|Hn{K86CfHUyV!VSTD7loz+_*McZOS><{=|ZK&UEW}T+6+XG8NMBz8CcPnrjlQ@>I0l$ok1Kqi3V+;& zBN#DW1vL3>+)iLDTn$7a9QMQtsxECf4$}-Fa>AB!oNNHqWpv&j4D=5|55x1!p*^2) z+OTsnH3h5Ikj4_)<{yxOrNxdh3=tB_`#^16Hn5V< z;<$|Af_T5;;$=cB=}3Oan~j?|Ur?55O=6ZXti`^#ypRzl;f^fIZtUXPMH~V+FHOUC;)C`lk z3=Q2nS}`*Pf=&G^9o0(NSZ6uU+|Ac#rkI=UGtp?TJi5yUbFq&|#Jw~WJO~tSRW9Zz zGL1y1at?0Wl9^|Z4HqUDqyCtLD$-1O49Lq0pizPsElc)Uw!ha~L3^mFS36ga&n5>j z@a^oPxz*MlXeo^#+%w5YKG_h;mM49*KmFJ?{3pc3z@8FwqfguE1;RtC-+fzKid%pe z>F)_k9~o(A$lAPcfswHpN4Qu0ptadPH{RS_vTYq`m#^~Dxvba zoBt=EIfFLE~HJYTxr;iJIQ>3wrf?t;smeAY}4j)V#4+x$-R zLNgv*6brGs%AsoXy$JB_2~XC=g@jotUUI$zsq2en@avdn*v z4E`^$E~tKqS!?rrzQ+DkqFlpv(HKNcYr`f_|FVM|bCFW;kZYJ#><($BX%fP`HT?>FW+~Q*Ro4 z;v&aN8daLAt+I$PiJG^AF5E7mn%ydGB`Jrrhdnym{c4a_H%B~$vW>yHLqFyaQLdPo z^R#oD@hb4WBcfUu;7FsiP6zHK=ZS`UX5OS{KfiT^^VAMxN$LGCpK;)1G+(lg-m;$T zIXW2mK9iz-I{(^ci`lc>#O9y3RLDiQE8|f(y7OD9Ak$SVeaFv7p$u9Qz`W_yHIT8}tb`sV>I4HSeBcAS0j zApHC3pg8Ecuupl>m?g-z@KC{b)No6A449Ygmhf1-06D?6CQ|{ykv}c2zc_X0+8D7V zK3l{$9j_ft+Q15=eyVk~mR zmzURvfC{0r0+N>>V}X}6M(5^$qf1lJQwIH~#^-V8cBD_#D%T3!SZ008}rX-NOa!5Mh zKA5ijHhC`Wc$Xg+Jvu$3lqQVAi8!4TDoE zfB7qn3v~^kCZcCfzq+Ej9&~{)wcHu61hux>xh;dU-d#7hGe!54TxQfJ_pj9!s4%|p zNls=|5Fehi-?O(hkMPpZeU;qMddc}krH0>`{a+fz1w$9|GYt@Re%-HZm8Poj%K+3t zFRw?x!)W4MF@sr?<0r(g?Rw}g8t-i0hKF|X=9cJzmZX8VgFSy;mb}kdA0ft}m@nnc zx4Mg20Z;Vfs)0_K=o< zYU4vCi^N^!dk2HOe;n3K7t(PLlT>VXNo>DUvLBx0g(r*ld?=w7xKr{iyVEEC%+0OQ z%sz$KFWtiZ!KQQf)9%h&Qe5jQ<7CxqA!MKs5FDbFaV7TAUzbVt@qwc(Jk^jm#zFg0wnj;Ew6r#_+H*I`0Vvzk^h9wf88iKZJW*cJIsQX z8t54xE+w!S*rn1<;rgU6_isx(@(wh`W~w?+&6HK^MOGZ2~dpp zhs%nrbXw+f$2_&;(41NLva(Q$(=xLA>%@K)?|nwE5(9Cu_3z%U^fq+0Fr(s$&we_E zOuSaU7~gRa_2+Aveum=|z?e!e=YLk{73mr+tRE0wUuKD|#me(x5DkKeDZ>VEY2(#IE=}IjdmcR&=d+ zKH6@tTW}W&d|1M@a)jlt^>AAsP_tu0J2n2njW)G4f5zD8e4LJ7C`Ivbp(8*p(J`)T z8YtmqJ+sK*USbC-NXBT`K5M`Cjaz`Ykv?KESlDxo&vE{(G~>~Y^WgIB+9fV&m3V_6 zBX-AVwOK`lZFsN!psRm)o3vX9wB7y0D*aKr634_Ci)Dtxtxpm7n%eW>3&BT%BkVhR1Jd z=-4wX2li(gU%W-kCwa|%^3i3PhH0FJam9`@qv450@3d%N^h67l43S} zB$2ng?g~Ac!rqfCY=7I(0$Mw?%>J*R4pN)v>f4vT#s2msZ`&G9owm0L#+V8E}+RVOr9GzQ?lzTQ)_Q-|Z{Q3EK zwKLSu+NPPnWcbfIS&v5Io8ACgNvYFDwmr;6H(37><}h92ZI{sPnH@w4uG7BoES4yL zM(x=o$V*G!QTVV}`_~;($eskJ*@W@z5%0ikG>5oxWR`jI}DfjP`NwyBM;7hfX0P zR*Z@M$A1KL`)cU3mxW|0f}A$M{#H0-9*FXGyKhLmSO0>*0YnDhg3mV;565bKS?HtE zZ%$V&@GvzUDVOR4T$ohv=qYR~X+=Du#dvmYg}rICH@y=eb84*1ZKNTQYV5ykfxF8!B+Sc1`+pi4NWKKX0L$*F$>20=6jfT8y`}%1vKaWz1sWNWd=|O zX1r3?wtG%(VHV35ah8kkJdQ3cHYZt_!f`_9y~#imaW(fMH|t8Mw7LsMgd&~hW4*9> z5Uirg2LJQIeRtf5hmwGb6Tyl>wE^XD+BWyAy(ybch&98H$?rq3!>kE^Jp-2J8I^4g z4YUH~DC+pO_agBQ%|RyR%Xv>x$BZ2eU1}n*$Hl7=mw)T&OA?YKOo=k^uer;1o{IS4 z-VM^iad>oj^Ect1E_0R;x1>owJt*qpDIfB2dh9f2h$dCa=lf5wA&as>8Xnoe=3S_s zevj!qHwdQp*xr=*EQ%&nL+_~wH>AX@5g^^Q{O&&svAIN^z@IP2I6pE5*{%&L${Rlm8rAyObos-o2x+LmF}aBA zkB8ItF+r^LrvnO~D2M(+4^yIz&HcNd3zgi_+M*Z?tX)b2*=~v~pYg!Fr!R&>1+T7f zG5%&<580xsE}3TFjYB#(-5I{kL%AQ%zDwq#Ny^dF^zZUxUgui(upj3OBW<63ykK~` zDwk{+eB>_5>W+WR^Zgh}LH_$Ys9fS3P`rWroX*cDh`wwCphuJUK3%{dMj86<4n2^@ zrnO#JEMbn{4{Q+nqVDB9-3m)*99$k*l&1?8(omGe>n2g`!Re{CQ`#sAPA^9eI1Z)z zCvRc{tHRl+>u^(dI$UdP1n|i2WL5*Y>-`7^$f|kY`Qrr=|KnvM4O;*soG4u zW?K|HOE9QVU~RJ+Sp3e7S?rJkXAx(w4Otfb%fmARa|!OI`NVPcTx5EFo0K49$f~Cm zd>p~|R2Qk62jitoz48HFDZY#S{>+orir2SnBZNtb%HabsI0GSCgW}Ru^o^&F&xpyz z(c|MPBriOQ2yzFdg%JZmEMcC`1ziCP+YEzTiUA(9=u7fn@8{5pxb?|Gg=4>uzxtp# z%uvre!Y=x)^iz*Z$_EX6nZS2veG}z>O{@H_o_>)=)75jt(PVz}_8llCbFPSB zP4mC(K8G&X&EtO@v79n=y-I&qXFtU77;hqM?ajXZIpo`nhXG5BwkJ!~YHOXrp$uYk z;+aURPM16!XZTSAZm`5n?!QdSPf|fVjn;~1Djdpj4{-H>|HMzfz;c{!PGuE&Kii%o zia{Dn<2&m~`(d+PZz@m(*pRUc#vmygcgr2Sv#9ulrSt7-&wMDMk5FcmwTwFOED%vE z5$1Vygpwb4&q>+t%tv9ow|J zbg+JqQMDX?&O?u80b~v#WLzvdwLQuk;fs##sQ9JgcI7NIT`N*6`jZ3aHpA*PGfgZO z&+EUW{V~g5c=550`z)+@?)5m`;nR%6JbEhWeMkWG_6zH)Nq_=SrJrQaf) z{pOt|>$$pDT%sWE^>uG!J@5}UZQqkQZ=PHmVVr}2{MHqq;i#zOoGiFnDxz2K@lSJ4 z%g*+Njx6z*ql4ldGxsGpDNt7_4vc2T8?j&+~!5&YSa#j>iD+9+G$i{G$58LW7h*B}$3mXm|IU-J-hza#L6Z zW;Nq(^Lbti`RQELI^9xYNxWV1R2g9U3kfxtZ40PwB!6|?4wHs7Q~&2hz0)Zs7=W&QYI_b_I)8bS0mmD(%+i6DX znlt6Hv!7Ih?kqPQg~{d7#=KnAmuAgM0yZ~}k(d2v`nYL4dM@@iM)gjt{ESMYvK{{g`CkHZJ6`JbVHhXtt^=7` z@U(aE1>n3<$ui6b{{)th8^63%0F=si9=<;2LQyKfZ>6(12a;G;|5MVHM?>}ge{ED~ zDwRa06-lXtC^J+D6|zM%AxZWvA2s7Zp|S5V!(f&>_x^6* z-^@8PXa1UV?!C|Ryx*_w9r*?6&5Z07C$8}q8lXMOtzPDo-AO_U#j z3^97vERjR&AU(Tf3Dtl;1^V)TLv^^XUE2X*7|Qofc{yC7LGhrURbF`M1DIy1_8ak6 z7j4_v{sMOJUv~92+|sen^M`}nXhNfZ#E>$Z*TYEAMVg6#D*vM(nrO_&hk+KZUVXf} zUTq*|a%^*_dsq?*$A5djOvp5E8|Jr ze6xP|`E^5Ep|>?J_DT5^|2NP|zO)?`@1*wob`?R99U6*?BLpck3R z_qdlF@RyfgbEfVt|10Y+<7Z=B(QQxg9+CDIeHTP#J;u2;Z}=F$UJ1U$+w(541#mAH z#nbo#Ln~06foC_koCd$Pw@FZ$g09;g;_aZS7ee^s2h6Ot#uY3hqe^*Sff`JSzFS$Y zx6>d%4-|~Ah$05jH%mMoa?I#k`Va9=%iA;Y3zyZ|qWxD$`)iOa8hO~8GE`UK_CdJC zDGb%^#U}ODi#@hRSONz`5&3qX3x^dDxzsFwsX6(;^A;Jm5Z@yW-l&fs%`XajDo3Xt_pl1@M-$iFVpg_(d$oc--1*gV zI?}g3GziAWBp<*~?rfE%aMIRh!&`4b5|9r|E~|0>(@7%ha&`>meg}n)t3kz{mSg7?gTnUx{BvrwP z#18=frzwDDC;(gKY+vWhFHDmxrI zmvFV}ce`OH17N#Sk&uK2WJEH;1$+D)L`r-XwZRWTF_8`m7r@A1Z=;qUoptRcb3g& z9c?DG($15pBx#-9&rh*^w8{)N3Lku!lUuM1F|4T9@scv%W)K#HB`&aLiCDib*GyJ! z#5RnhTz|#r;c;X5z)7atofc8pzApHoYdwo06nO#S9h~;{Go3NmtA8+f!5@C?`p~7Y z_SveaU!ABb>lv#l8k2YHTJO%<2Z!-$un3YZC6`x6-}aCzPIy%geOqSidwWXf?2zsP zbz$;(NWyZqPN@RM-JV1~PX{!b9Z5jWu)Dg^&MIr+^up6YVLn zAsX{6hZ!AvSS+OI)RGbC%0Mq<++#W~(nSB-FW}M@J>T%u`AhYq13GxrX>t#znfw_M zkbLM^rG+yb*~p3b0RM2(l}JXA#~WvF6iD+qf+ipW-XsZEtn1zB2QSUKD%#?&)aBfq zaYY`@Go5R3ionN3d7Y90au~LTyKUt^-qmGHvGLkFSAkp+OIo-$_C`+f!k=%L$1TPQ zdmDd8m0HdiXL?A*(TAF{gWO}Ss+vcGQ2GI5v3g&wPM$r;XVr^RHlhc?{^F7a_0o0N zlc#dV#5FRXvqCGzyW&qYdrCPnC5G<7yZ`R{f>yftp?fJkw+O(0>0ea{r)iXkssnaieB%2LmVz^ zZIucu{{&!#Tc#)2i){3fHM;7a=9Jnx zQ~+lhPLprryCgSU-`oV+0{k8BK#!k7r`381y42@kh_ZG@lyGWt2GfKJDy@pZaV*b{X4~@%J$K3S#q0_5Lm0 zrnr8p`@jVb>Hl$-6h?>SanbRJ`j0=8)Af^yErfWD2Q!tP(B4p0R^pf1Gj%`Ak^gC~ zg4#eOo<~=xi^ZD#9X6Fi%>ZW1JXD1yE0@Xl>n6if0*=LbTkVjzSCLt_^$l4!!D&GA zw2JATCw%ynKv1_^;tkBGQ1jbf81UHdmAnw<+LVO+*xJRDW`r;zS9AN#QjwBvK-<9S z+A)DRvJl_9Az{L?cTFRGU14_%qfvsuYBnoG-_#@f?@K~+HAj*4U52p15=Bx1oxqOq zNX`ME{J`={s2gYma!~shP(gx(075RXdsQbztxlwVvh$ILY5)aay>us`WKT{Kbw&cH;|gG<28 z9I|mglD+@a*{J8>&pPBc>dT|(ZhLzoLgQKgbH_-&E7{LYT#m7dinhb<|0_X@V)&xr zP1XZ^IAn~}E0H)Oc$$1aq+*m+*NkI#1Tx&&D}tOWzfZl{=l_kJ*+JVgX^W%%JdEO8 z-4fIYKK@t2A0B?tjHrXO;olT=1q+zRz4)~qb#z6ch5Vspt~p-vuIbd&urYIsO{^O* z<6k~fP}5O2wlehUC(GMiQ{c`)6ThW1ohH0Bd*29o8ALd?utMJcITZb?1;p3>3zv3o=pVt@rQDaMEG zy40(6zs3c*DT(mRMXM9Pgl=w^`YwD?J1h*VL_7!e1>55FK*NXQ^TZp&CisjHPEC|+ zGY%5}(C*#4BVzZ{tkP+6xAe1{nUb`xZ57S9v#%N=*w(vz`Vf%TJ zBYjkh`kC25RiH=fW$up$EPbRFS9HooMdqEjfNl}NZQKwKfqu$jM9>K)_h?481SyCw za<@1Cp&ee~b2)Hxqi^`Ds6zcGKVGNuM{Udu7nSgjO$mqw2UhP9qe!3;Jg2}l<8+;vkFC2luALuC{rw#3 zOYf=B|4ZRq>%zABu;y_KtIx-<+|3+pvq#A&gJ^mHl#Osc50Az#Z6FlnFi?s9Fd9J| z#UR_!2PA#pc6oxN{ z>h3qe4m@$q;KRQ8`f)u38u}l^?6XGv1vB0$0?@f4U{va3n0tYpb>3k6(I)Ww+X&VGxb^=;8^0$n9$RQBKW5PgCuOLkFH&d$DRU^NmrU~u9X zM>1+`)+JZ!QIVlTwXH@4y-1X3phXrea-@v(8$e^WbH$+m{VFg%ETRPVNOzURC)0lC za3?)yQIdsxwEp8iG>Fy1i`h@&%{6zEuO#PsW5!BbQHFNowxx3-_)5X=od%$H!SF9D z^cAD$uY+ET^75USr$+`KUurN*61jscE4-D7TH6radnYl9q9!Uj9jp71BkFc(D!TbD z*q|w@%rnV6a?|B~gYyF7K(&i6=zQ&DAiVOtkIuT z46oBz{V8OyrS%VJKvt6YD`Lo1j*@(3<>T1 z-+fvP#Ymx}KS9r2kft^Fg0JfkSRg-g)Ew{TPzapf>PW4qgM_&xiP#;)BXV&KmP$7o zb?IimWCZn=5J`xeX_)F%y?pGQmP=*<9|d>N49sur+5TYsT%G5Rs*UPhlNM5SvJGRq zq<+ASA}06BYl8*HBB+lRE>Ax_OQp5Jd(aYTjLI9!th@+a_tsQQH<1z<{KBwymiFPQ zXHxfiG{*E1)AbP33LPVoygkZP@##xKkkf)oQc-u?+^uGCi~8&AeVqYEZ{rX8-zM#N z|2qIpu))?MH1RKDkLA=4jt~BxmbU=(RxTs!ze<=cKhjJtIQ>_$*o0(3=#fqpeZl#y z=MWDeNr4;kCK~6Y!8!6GZjlHcFl_t4mci;4gGUqlDA z(ztTq%~I-J3rO7VwS|~+2xgT0Py~3+5b3h|=ec0U$RM`-nr0fyYytt}irBMB*h;qk z46%mY?+fXqkXXGLwO$_?h(~Uh#hoZ^iBq=F;qwEax$Lby0g0RMd^(7U@E*rv+J8?D zQV{53*5v(2F$qn^!TkbB;Gul*)3r2zDbW`9_=$zGzFh7gBJ2;EygN4`};v z;IE1p%P%mx%t@DMeV0cE*A#Hi8>_X&?FNNGhT4^CtNCN|UI)#p*CuAiN#%ua&el!} z&a&C&FDLW)UDW9^-StFJ+Rr_(?&~Yk{3oXG%*7Oa>2=uNrpx{8B z(D#11`RLEkBFEKxuC*hOPJowJjne7b-r9s%V!p&*R=D0$C`afsB2oD~HWEty8#>~{ zxy_yDc=4nk?fH)5MpH)4&vUwl8Zq{8mhaWK-DCltkMASABepF!55+MKcq5Deoh8K7 z;sJysaQFgJF)2ujVx`UNNW__O#Gk`TZ@su~2vV`B|D(Z#zDmY0#G~auAbT3ZiV^(Y zuZIUN4kGeKgd*b&58PP+c=;>GtmkM0-ftiwzRTZ*kI%WsTU+nh)i|AhVeb&0_V~op z>l*Xlr7873s+p`SA&#q^`T_>8BUQ1_M!Ii2GDpigkG*`+uI(8?0e{>!X}*2s+=0|M z9p8ip@=M<3N^$x6JN)ali;vCsSRCZuN-&(-2OMOqs<4c=Aj-jOi>0%!PZiRSc^?Yw z)17`&FDO1cbYfWyJJj(&l((P@=G)#=U?oGZwhpf!cc!B^XF{L^m1{l-t`>-DNds>A z5WYTxe2_6bf{*9Wc+u&$Em%&(z=1kP_b1a?X+K~0A+8>fC{&OvJmGJwG&$RqUA?E%7OJ}a(VwUqk6dS| z3kOCQZ#-lANV{t?@dMEPj=2tl2LB!Ke+J}-)2omp&c+IwLpN4Njk zh~e!uQ*2F)x*htCQY}o%R%JG*Zjn)WS(DxD3i*o1bS0X+^~c4qmzD|A1~m=pY+`87vFDh{=G42`dSrn77>^Aw)aKi!#Yg}4S+ zm|L=HYcI<7PCO8O+SeuB*U1r}1qQEit9D_aEZe;H%)% zL3aNBfBpq3c`|f(5=uS#Dn=z@J|QYib6GB}=IE)+mNK&>L77Q0GG`!D6>#@4q;C`B ztl=1zrdwh&qv`6=0kK8J$lmG#cb}r22{|5Uth+0JM$hB0WK=oBq~=c% z{hVuynXj6dXT1+Ll~H@-fFeP+QKVcDa;+CtgSx9lU<>~Ij%qn%+|V}i@9O!$z0QlA zr@15R{^Ux=c~(9XHDrEcJQ;4HF&$Vhp{n8r^8mqWby#lOQvoB_Sh_+O#SngENVIQ65b7mLf+v(2N1pT zy}Gm%k+{?LAi{OC=c~u+YGpA^eXlXYta}bf6ur1uHD=QwlMHRoZLc32aPG9s$lmw+YKLSZBX-B7m5gwm;*TCE;G+A0WN?=8yIHRUEg2 zP7ON;-@;f1Ki1PCMI+RRFGnIXHR*<&%q*nM)sKl#0RJ8 zv1ET&g3CH~e)8S&lC;ToH~^N!{pi=udl0=;tuS9QXIrO~sbzH{TW{*gwT3{(J08MP zF3s*BWS!qDt5MBElPaM|o)6a@N0>U4liY;yxExQfq?&qP=tK7Uc{mADTXfBcxpUlU z2sXfI1$1Ajvl^3!%S|s1PLsnBM4HN4d`oFX5I)V<2y{~R5W$?_&3_g%PHYYR0yxwo)9qN*d zSq>hPSohidjFYj~40obiMTCwMKHTj%#G@dG^7g*el%go!@CjWQN%ibq_c^Nzcfti& zj|CE&x=Oo;jCKG;dl7r`a`Ko76f7`g6JHG~>)y`}XI)$Oz2L_lQV&+0u8o9E!5mlx z>eb(v_*dAr>)0lJgj-eLkIKR8;Hl+muu^~E^&FRNuf_ME^I}~Gi*!w)E^W3SES+ES zuvPuTpltYlB*y>+6CGQADn70aDJRki|)nbwMpIO9Yv;(hwzv|4f9Uik9alUmD z{&fG~$EQX#PvhSFJ+(i^e7N}R8#fDnCC0%J31-7S z9Z0--l_dHuN9abv$m`3qJPv^m9-i#<&%AmVyzUQ%z#Sv#qx zY08e~h^w%I-Fk@MUqV0idyfE}J#r>$?6bm{1d|qfc(=+|Zyl@KLsp&(KFx;qy8h69 z%(rE>KK`Wno=u(cQ1(y1zV8miDTzLZ;QZ3VZmFzT^_F?7&y%0qa^t_8hZ9s2H_q&f z=~3(ZOAJc;Ebmg2i&a5nV!K0QEGlkQg322TpUt6szTlEF7ShP$r!kleYB6#9v7x^I zEvaS$sf*3>a$!F&EedL7?E}2JB5ze8ywP1E3mGzFYf*xxcGGCS|Wr-*m^`O~qn<*|-DzVNH} z;4Wm!!Y+*Ki2r{4UJA%7U$5ReV4=1kT)e zB>@w^2blIWeJB7SIif8`465nged{`UC#TGbX;}T6!i23}0|)}nSwgnBK%r8E>e2L? zQ()_ZN8ty&NE%aPYefd64T>Y4#Izx{K4F=&Spozni47D@ZbAeN1iNYnoHa%fAo4t{ z)(l6i%(B8pAE}S}bu#RupWh9XoNp!QYKgr!=BN(a(m*!w;4xzc0#;Jz4i`^2j6Asa~^Z(&uz0 zzpi|`V{WOJ=|OpT@AY=nvkXnlpJf4sj-HPRJpHLcG8q=Px8$BVjmtOUTup^HM~R zcjR2Z0zNW8D2x>MQ&Jnx37w4D^B4u!p65x3x~Sa0q^8dvp^rFrome$NB>2LaQ2DP$ zO5twvgcMpxBrG85=*2Ji%~(&o(0ZM5EBb`0ec@Vs7{1oJ5#Mz77;gOl@#LXatw-}X zM-R171?7n>536ze+;c&{1jQQu`^dllZ=F!E#(P$t_RZ$!VJp^M%D%=C=3jTy7&Q8h z`6YZMDO%zEm$pN|XrjuTX9If6;Js@tKf3lAp0QcgCG6p3L^D>HXP|9Bmkd-3xUQE# z8DpP-E?_KU-5_)?e2Gr+h1+Rou@WDruEw0Z`ZT2XMaI zRYKE)sJ_857JwtqPSbr)bKdvvfkZuU>bnhApPg_>%Kv9s-7i#0V;gZF_!*#}&H{hd z=NDg6vq2wfsQphA;ymQm?>6#~9<;cV2mrjV?3`;lL*3ujfjLuRQtdGxI{53^%#c;XFr=|s* zHWD9_2)BhqaqjCpc)x91v(mTFqZVuuK?2n@a-P$wgEUk8xL+g-pimn}bL9Pf(%|aU z)IMxYaN4l-xp6(JWaj|qR8H%_pLP-?4h=)qZ8^J!B~^#TE}f}4it;-@JZ^`9 z9K+ftk}USp^`eYTt`Z(ry8?5`{>6<8CRdp=X#Woff zhjq&;8DD(9z0&g% z0`O%S>~IG%Qu&n9CnJ)(bb+n_cwmzn^_0jW%e+acJ3?SsFaEtVQ`E*{D0sPy*xP_#;$Wj*1>hz%Jr|9eA` z=8jCl%Jg>NXyC!fSF+}RBC+Aj;b$L5W-Kb&qoaowI0Ik1d~+_unF z+A6JJWf$Sfv?o}wZq|K^bLAtIlg}4^D3Jwz_j|^3R?(TJltTlY#XiyivKf_PZN|Gk z_*t!>QZ{jYa1~D;XCs}+Eb@0w+Vgw>>QG7ErQ{jp@{w!)u{EgoUiT;{GCle}^ph9f zc?reh+;Xj0t+xM8IIt1VoKZURgzmq<3NZNlDF1e%m%*p#)ARb~E)IEuHcg6meelYT ztS&Qh(hjO6-AeFMwYyctI3J%tt73OFW@Bju6t4a>jwz1qUnn6p6j`m0dnYs{fy zyvDIu5>b(9SSP8qM`Go?z7-RNaVA%R(%*90xf{A6ctmCuUCv1HKb4 z@CMVN>~%Rs?ht)+T@aLCDjg;})cw0BFVjCGVC;J30Pcj=KuZ&B0i3>7NR}TUNfTmi zo!F=f&GicNg+<$Gic`Y3&pw22GK9kDCJFXWZ=NiC2YM35ES}HMOU88LGCMGp;9nPF ziS2@4Cd_3g?2P1%R<2bJL#@9Kf4&t0zag(~?q$05OM62^UShOHhw`yAJ}#jTZ5n(; zhPuvt+EIOgxUEd6|_b13g+q0M{&nAC5u&mKOsz^s!!e5yK%P;!hmwveuQuN%mA? z`i74F`Ax^*zHqlg+?ce8ZI%`eds&Gy-Ly#lP=9pb5c8tcIWc89Q-YKqn=~2dWwA~b zI$oJT&FDJ|*AAiv!qF4=W$enq6E_dGqc5t?&;A!GngHvjtL<- zY6ocfyxWA=17X;5$P_A=)^gmNA9YWoGZkxnvIbCr9uuN@#*kd$)Gc#bdA%95^xlYF zDz^oDgqt{yTRn68$q?BXa=P;bj8S%Qm+qt=FMZt1kWZ}~L1IkoGXheqiYUg-s_H=* zgKCZHGrz>&Sel>7g=Ry-cK@y`=Gjmd4$v$cEE!eTA=ezGk4y0pq;s#drSPhv;aZx) zN}#3E+a!AVV7gYT5;P_MTNZUNm7hrOqiY-nM<1|!@RchZA-shq;(}sUF|ZTKcFB6I zQIx@^ai*{T`O$|EV+|LIz`f zSMTb$ZIF(7djoy91DNpP;nrMP;h2DX$H!D%IEa=Ley|}AS)n((8S{%8L}4WL@i$o& z^EMwcBR~g%QcGr4hbza+JvmpJOIzx-^npDLsrD>tAAE@%&a<`r(6CB-|0T;Nj3Uo8 zUs5ni50JD%FZK40Od=3E^ZLXgS{qVet?OweG>=_ZL-OQtjKTrh4Zp8VFxs5_j#V+Q zU;Og5>oob_Tc3Snn(_eubsF25{(|3xOH#r)%I(_RPvk3WxSa|7gQR59T?6zwV7R zR0O!@R9|{H-gS22TOqmLduq7NA*T`5txfu5sKF$!ZI+u7L$*$c9`HJ|mfveGRMPj9 zBY-P#-DteU@~zM>+P$&`Yu2mik+LMBbQIkdH#dc7)AD(9E6QIebGFx1zYUU$#Z>DB z2y*XR+D-DV&66ic;19&dxw{Aoev9a(#*RZbiQ}=_Tws;dk2$w`8kz(^FSvSG+RUzD zn#|Z-#m%Co$)rixiuk{(7<{{=5t`w6wGd=m!}|%JO{OWa0r;NqeP0IsWP!S6^d^t$ zNO%S#!;jmrB_+S%-_&Nvn5a0Zmi|XCfIT(le6uGH+q<%{G)WCDqBm82i<|c~2?m3| z)t+j64)oMGDaLwJ zzo((-d|w3Egj3}AdzL%)86`@j_eM$zz@I!aK83L^ed1lEAX)ZE@i(v+7bAy(G;eNk|h`e5~MW`K6oE*IHWS*5vQ@P7=n-=7W za*;wfL0E;b+J-UXVuukC;QdE;H-7#ICus5PPCUVjZ{(_0z={x+bM}rLqJtRRs}Y+4 z`cPaQd66cA*DF|_Ke5hg*GAUP9#n;lAqm1~er`sN$4`BU*!T()=-PTNC`NkrF0E9W zYrY0gLg^sYWi~V$Y}= 1160.81.1 + +## Use Plugsched + +### Install the plug-in + + ``` + # rpm -ivh https://github.com/koordinator-sh/koordinator/releases/download/v1.1.1/scheduler-bvt-noise-clean-$(uname -r).rpm + ``` + +If you update the kernel version, you can use the following command to install the new plug-in. + ``` + # rpm -ivh https://github.com/koordinator-sh/koordinator/releases/download/v1.1.1/scheduler-bvt-noise-clean-$(uname -r).rpm --oldpackage + ``` + +After installation, you can see the `cpu.bvt_warp_ns` in cpu cgroup directory and the usage of it is compatible with Group Identity. + +### Removing plug-in + +Removing the plug-in can use the `rpm -e` command and the `cpu.bvt_warp_ns` doesn't exist either. Please make sure that no tasks are still using `cpu.bvt_warp_ns` before uninstalling. + +## Use Koordinator CPU QoS feature + +Please refer to [User Manual](../user-manuals/cpu-qos.md). \ No newline at end of file diff --git a/versioned_docs/version-v1.4/best-practices/colocation-of-hadoop-yarn.md b/versioned_docs/version-v1.4/best-practices/colocation-of-hadoop-yarn.md new file mode 100644 index 000000000..41dfe5628 --- /dev/null +++ b/versioned_docs/version-v1.4/best-practices/colocation-of-hadoop-yarn.md @@ -0,0 +1,230 @@ +--- +sidebar_position: 4 +--- + +# Running Hadoop YARN with K8s by Koordinator + +## Introduction + +Koordinator has supported hybrid orchestration workloads on Kubernetes, so that batch jobs can use the requested but +unused resource as `koord-batch` priority and `BE` QoS class to improve the cluster utilization. However, there are +still lots of applications running beyond K8s such as Apache Haddop YARN. As a resource management platform in BigData +ecosystem, YARN has supported numbers of computing engines including MapReduce, Spark, Flink, Presto, etc. + +In order to extend the co-location scenario of Koordinator, now the community has provided Hadoop YARN extended suits +`Koordinator YARN Copilot` in BigData ecosystem, supporting running Hadoop YARN jobs by koord-batch resources with other +K8s pods. The `Koordinator YARN Copilot` has following characteristics: + +- Open-Source native: implement against open-sourced version of Hadoop YARN; so there is no hack inside YARN modules. +- Unifed resource priority and QoS strategy: the suits aims to the `koord-batch` priority of Koordinator, and also managed by QoS strategies of koordlet. +- Resource sharing on node level: node resources of `koord-batch` priority can be requested by tasks of YARN or `Batch` pods both. +- Adaptive for multiple environments: the suits can be run under any environment, including public cloud or IDC. + +## Prerequisite + +- Kuberenetes >= 1.18 +- Koordinator >= 1.4 +- Koordinator YARN Copilot >= 0.1 +- Hadoop YARN >= 3.2.1 + +## Installation +All charts can be simply installed by helm v3.5+, which is a simple command-line tool, and you can get it +from [here](https://github.com/helm/helm/releases). + +![image](/img/hadoop-k8s.svg) + +### Install Koordinator +Please make sure Koordinator components are correctly installed in your cluster. For more information about install and +upgrade, please refer to [Installation](/docs/installation). +```shell script +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Install the latest version. +$ helm install koordinator koordinator-sh/koordinator +``` + +### Install Hadoop YARN +Haddop YARN is consist of ResourceManger and NodeManager, and currently we recommend users deploy the ResourceManger +independently on hosts, while the NodeManager as pod. + +Koordinator community provides a demo chart `hadoop-yarn` with Hadoop YARN ResourceManager and NodeManager, also +including HDFS components as optional for running example jobs easily. You can use the demo chart for quick start +of YARN co-location, otherwise you can refer to [Installation](https://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/YARN.html) +for official guides if you want to build your own YARN cluster. + +```shell script +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update +# Install the latest version. +$ helm install hadoop-yarn koordinator-sh/hadoop-yarn + +# check hadoop yarn pods running status +kubectl get pod -n hadoop-yarn +``` + +Some key information should be known before you install YARN: +- ResourceManager must be accessible in K8s pod, no matter it is deployed as host mode or pod mode. +- NodeManager must be deployed as pod mode with an annotation `yarn.hadoop.apache.org/node-id=${nm-hostname}:8041` to identify node ID in YARN. +- NodeManager must use CgroupsLCEResourcesHandler as linux container executor, and specifies cgroup hierarchy under k8s best-effort directory. +- NodeManager pods request resources as `koord-batch` priority, so Koordinator must be pre-installed with co-location enabled. + +These features have already been configured in Haddop YARN chart in koordinator repo, and if you are using self-maintained +YARN, please check the [Koordinator repo](https://github.com/koordinator-sh/charts/blob/main/charts/hadoop-yarn) for +reference during installation. + +### Install Koordinator YARN Copilot +Koordinator YARN Copilot is consist of `yarn-opeartor` and `copilot-agent`(WIP), + +```shell script +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Install the latest version. +$ helm install koordinator-yarn-copilot koordinator-sh/koordinator-yarn-copilot +``` + +## Configuration +1. configuration of koord-manager + +After installing through the helm chart, the ConfigMap slo-controller-config will be created in the koordinator-system +namespace. YARN tasks are managed under best-effort cgroup, which should be configured as host level application, and +here are the related [issue](https://github.com/koordinator-sh/koordinator/issues/1727) of YARN tasks management under +Koordinator. + +Create a configmap.yaml file based on the following ConfigMap content: +```yaml +apiVersion: v1 +data: + colocation-config: | + { + "enable": true + } + resource-threshold-config: | + { + "clusterStrategy": { + "enable": true + } + } + resource-qos-config: | + { + "clusterStrategy": { + "lsrClass": { + "cpuQOS": { + "enable": true + } + }, + "lsClass": { + "cpuQOS": { + "enable": true + } + }, + "beClass": { + "cpuQOS": { + "enable": true + } + } + } + } + host-application-config: | + { + "applications": [ + { + "name": "yarn-task", + "priority": "koord-batch", + "qos": "BE", + "cgroupPath": { + "base": "KubepodsBesteffort", + "relativePath": "hadoop-yarn/" + } + } + ] + } +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +``` + +To avoid changing other settings in the ConfigMap, we commend that you run the kubectl patch command to update the ConfigMap. + +```bash +$ kubectl patch cm -n koordinator-system slo-controller-config --patch "$(cat configmap.yaml)" +``` + +2. configuration of koord-yarn-copilot +`koord-yarn-copilot` communicates with YARN ResourceManager during resource syncing, and the ConfigMap defines YARN +related configurations. +```yaml +apiVersion: v1 +data: + core-site.xml: | + + + yarn-site.xml: | + + + yarn.resourcemanager.admin.address + resource-manager.hadoop-yarn:8033 + + + yarn.resourcemanager.address + resource-manager.hadoop-yarn:8032 + + +kind: ConfigMap +metadata: + name: yarn-config + namespace: koordinator-system +``` +You can change the default address and port at `yarnConfiguration.resourceManager` in chart values. + +### (Optional) Advanced Settings +You can check the helm chart [hadoop-yarm](https://github.com/koordinator-sh/charts/blob/main/charts/hadoop-yarn), and +[koordinator-yarn-copilot](https://github.com/koordinator-sh/charts/blob/main/charts/koordinator-yarn-copilot) for more +advanced settings. + +## Check YARN Available Resources +1. Check node allocatable batch resources of Koordinator on node. +```bash +$ kubectl get node -o yaml | grep batch-cpu + kubernetes.io/batch-cpu: "60646" + kubernetes.io/batch-cpu: "60486" + +$ kubectl get node -o yaml | grep batch-memory + kubernetes.io/batch-memory: "245976973438" + kubernetes.io/batch-memory: "243254790644" +``` + +2. Check node allocatable resources in YARN +Visit YARN ResourceManager web UI address `${hadoop-yarn-rm-addr}:8088/cluster/nodes` in browser to get YARN NM status and allocatable +resources. + +If you are using the hadoop-yarn demo chart in Koordinator repo, please execute the following command to make RM accessible locally. +```shell script +$ kubectl port-forward -n hadoop-yarn service/resource-manager 8088:8088 +``` +Then open the ui in your browser: `http://localhost:8088/cluster/nodes` + +The `VCores Avail` and `Mem Avail` will be exactly same with batch resources of K8s nodes. + +## Submit YARN Jobs +Spark, Flink and other computing engines support submitting jobs to YARN since they were published, check the +official manual like [Spark](https://spark.apache.org/docs/latest/running-on-yarn.html) and +[Flink](https://nightlies.apache.org/flink/flink-docs-master/docs/deployment/resource-providers/yarn/) before you start +the work. + +It is worth noting the hadoop-yarn demo chart in Koordinator repo has already integrated with Spark client, you can execute the following +command to submit an example job, and get the running status through web UI of ResourceManager. +```shell script +$ kubectl exec -n hadoop-yarn -it ${yarn-rm-pod-name} yarn-rm -- /opt/spark/bin/spark-submit --master yarn --deploy-mode cluster --class org.apache.spark.examples.SparkPi /opt/spark/examples/jars/spark-examples_2.12-3.3.3.jar 1000 +``` diff --git a/versioned_docs/version-v1.4/best-practices/colocation-of-spark-jobs.md b/versioned_docs/version-v1.4/best-practices/colocation-of-spark-jobs.md new file mode 100644 index 000000000..ee27d1389 --- /dev/null +++ b/versioned_docs/version-v1.4/best-practices/colocation-of-spark-jobs.md @@ -0,0 +1,101 @@ +--- +sidebar_position: 1 +--- + +# Colocation of Spark Jobs +Apache Spark is an analysis engine for large-scale data processing, which is widely used in Big Data, SQL Analysis and Machine Learning scenarios. This tutorial provides a quick practice guide about running Spark jobs in colocation mode with other latency sensitive applications by Koordinator, which is helpful for improving cluster resource utilization. For more details about how to use, compose, and work with Koordinator colocation, please refer to the [Introduction](../) + +## Requirements +### Koordinator Components +Before submitting Spark jobs as colocate mode, you need to ensure all Koordinator components have already been successfully installed. Please follow the step in [Installation](../installation) guide. + +### Install Kubernetes Operator for Apache Spark +To simplify running of Spark jobs in Cluster, we import the Kubernetes Operator for Apache Spark in this practice, which uses Kubernetes custom resource for managing Spark applications. + +With the help of Helm [chart](https://github.com/koordinator-sh/koordinator/tree/main/examples/spark-operator-chart), Kubernetes Operator for Apache Spark can be easily installed using the command below. +``` +$ helm install koord-spark-operator ./spark-operator-chart/ --namespace spark-operator +``` + +Installing the chart will create a namespace `spark-operator` and if doesn't exist, besides, helm will create a spark-operator Deployment and set up RBAC role for it. After the installation, you should see the operator in running successfully by checking the status of helm release. +``` +$ helm status --namespace spark-operator koord-spark-operator +``` + +## Run Spark Applications with Koordinator +Due to the mechanism that Spark driver pod needs a Kubernetes service account to manage executor pods, the service account must be authorized with appropriate permissions. Run the following command to create namespace `spark-demo` and service account `spark` before submitting jobs. +``` +$ kubectl apply -f examples/spark-jobs/service-account.yaml +``` + +Next, run the following command to create Colocation Profile so that all pods submitted following in namespace `spark-demo` will run in colocation mode. See this [tutorial](../user-manuals/colocation-profile) to learn more about Colocation Profile. +``` +$ kubectl apply -f examples/spark-jobs/cluster-colocation-profile.yaml +``` + +Submit a Spark TC example job to namespace `spark-demo` with the command: +``` +$ kubectl apply -f examples/spark-jobs/spark-tc-complex.yaml +``` + +Then, check the status of Spark application by running the following command. +``` +$ kubectl get sparkapplication -n spark-demo spark-tc-complex +``` + +This will show similar content as following: +``` +NAME STATUS ATTEMPTS START FINISH AGE +spark-tc-complex RUNNING 1 2022-03-30T09:11:22Z 14s +``` +Now, all pods submitted to namespace `spark-demo` will be switched to colocation mode, check spark-driver pod as below for example. We can see the protocols like`koordinator.sh/qosClass: BE` and `kubernetes.io/batch-cpu` are successfully injected to pod by Colocation Profile. +``` +apiVersion: v1 +kind: Pod +metadata: + labels: + koordinator.sh/qosClass: BE + spark-role: driver + ... +spec: + containers: + - args: + - driver + - --properties-file + - /opt/spark/conf/spark.properties + - --class + - org.apache.spark.examples.SparkTC + - local:///opt/spark/examples/jars/spark-examples_2.12-3.2.1-tc1.2.jar + resources: + limits: + kubernetes.io/batch-cpu: "1000" + kubernetes.io/batch-memory: 3456Mi + requests: + kubernetes.io/batch-cpu: "1000" + kubernetes.io/batch-memory: 3456Mi + ... +``` + +## Evaluation +With the help of Koordinator, when pods resource usage is idle, resources already requested can be reallocated to other colocation pods by the overcommitment model, which can significantly improve the resource utilization of cluster. + +In our experiment environment, before the Spark job submitted, we can see the cluster allocatable resources run out while the actual resource usage is in low level. +``` +$ kubectl describe node + Allocated resources: + Resource Requests + cpu 7620m (95.25%) + +$ kubectl top node + NAME CPU(cores) CPU% + cn-hangzhou.your-node-1 1190m 14.8% + cn-hangzhou.your-node-2 1620m 20.25% +``` + +After submit the Spark job in colocation mode, those unused resources will be reallocated through `batch priority` to Spark pods, so that we can make the cluster a higher utilization level. +``` +$ kubectl top node +NAME CPU(cores) CPU% +cn-hangzhou.your-node-1 4077m 52% +cn-hangzhou.your-node-2 3830m 49% +``` \ No newline at end of file diff --git a/versioned_docs/version-v1.4/best-practices/fine-grained-cpu-orchestration.md b/versioned_docs/version-v1.4/best-practices/fine-grained-cpu-orchestration.md new file mode 100644 index 000000000..92851eb8c --- /dev/null +++ b/versioned_docs/version-v1.4/best-practices/fine-grained-cpu-orchestration.md @@ -0,0 +1,259 @@ +# Coordinated sharing of CPU resources in Colocation Scenarios - Fine-grained CPU Orchestration + +## Introduction + +In a cloud-native environment, users often deploy different types of workloads in the same cluster, leveraging different peak effects of different services to achieve time-sharing multiplexing of resources and avoid resource waste. However, colocation of different types of workloads often leads to resource competition and mutual interference. The most typical scenario is the colocation of online and offline workloads. When more computing resources are occupied by offline workloads, the response time of online loads will be affected; when more computing resources are occupied by online workloads for a long time, the task completion time of offline workloads cannot be guaranteed. This phenomenon belongs to the Noisy Neighbor problem. + +Depending on the degree of colocation and resource types, there are many different ways to solve this problem. Quota management can limit the resource usage of loads from the entire cluster dimension, and Koordinator provides multi-level elastic quota management functions in this regard. From the single-node level, CPU, memory, disk IO, and network resources may be shared by different loads. Koordinator has provided some resource isolation and guarantee capabilities on CPU and memory, and related capabilities on disk IO and network resources are under construction. + +This article mainly introduces how Koordinator helps loads (online and online, online and offline) share CPU resources collaboratively when different types of workloads are colocated on the same node. + +## Problem Description + +The essence of CPU resource Noisy Neighbor is that different workloads share CPU resources without coordination. +1. The default resource model of Kubernetes uses cgroup (cfs quota) to limit the access of different loads to CPU resources in terms of CPU time usage. In this case, some workloads may be switched to CPU cores by the operating system scheduler. Since different CPU cores have different memory access time to different physical locations, switching cpu cores will result in longer memory access time, thus affecting load performance, thereby affecting load performance. +2. In NUMA architecture, SMT threads (logical cores) share execution units and L2 cache of physical cores. +When there are multiple workloads on the same physical core, resource contention will happen between different workloads, resulting in load performance degradation. + +Kubernetes provides topology manager and CPU manager on node level to solve the above problems. However, this feature will only attempt to take effect after the Pod has been scheduled on the machine. This may lead to the situation where Pods are scheduled to nodes with sufficient CPU resources but topology requirements are not met. + +## Solutions + +### Application-Oriented CPU Orchestration QoS Semantics + +In response to the above problems and deficiencies, Koordinator designed an application-oriented QoS semantics and CPU orchestration protocol, as shown in the figure below. + +![img](/img/qos-cpu-orchestration.png) + +LS (Latency Sensitive) is applied to typical microservice loads, and Koordinator isolates it from other latency-sensitive loads to ensure its performance. LSR (Latency Sensitive Reserved) is similar to Kubernetes' Guaranteed. On the basis of LS, it adds the semantics that applications require reserved binding cores. LSE (Latency Sensitive Exclusive) is common in applications that are particularly sensitive to CPU, such as middleware. In addition to satisfying its semantics similar to LSR's requirement to bind cores, Koordinator also ensures that the allocated CPU is not shared with any other load. + +Also, to improve resource utilization, BE workloads can share CPU with LSR and LS. To ensure that latency-sensitive applications shared with BE are not disturbed by it, Koordinator provides strategies such as interference detection and BE suppression. The focus of this article is not here, readers can pay attention to follow-up articles. + +### Rich CPU scheduling strategies + +For LSE applications, when the machine is a hyper-threaded architecture, only logical cores can be guaranteed to be exclusive to the load. In this way, when there are other loads on the same physical core, application performance will still be disturbed. +To this end, Koordinator supports users to configure rich CPU scheduling policies on pod annotation to improve performance. + +CPU orchestration policies are divided into CPU-binding policies and CPU-exclusive policies. The CPU binding strategy determines the distribution of logical cores assigned to the application among physical cores, which can be spread or stacked among physical cores. Stacking (FullPCPU) refers to allocating complete physical cores to applications, which can effectively alleviate the Noisy Neighbor problem. SpreadByPCPU is mainly used in some delay-sensitive applications with different peak and valley characteristics, allowing the application to fully use the CPU at a specific time. The CPU exclusive policy determines the exclusive level of logical cores assigned to the application, and it can try to avoid physical cores or NUMANodes that have been applied for with the exclusive policy. + +### Enhanced CPU Scheduling Capabilities + +Koordinator supports the configuration of NUMA allocation strategies to determine how to select satisfactory NUMA nodes during scheduling. MostAllocated indicates allocation from the NUMA node with the least available resources, which can reduce fragmentation as much as possible and leave more allocation space for subsequent loads. However, this approach may cause the performance of parallel code that relies on Barriers to suffer. DistributeEvenly means that evenly distributing CPUs on NUMA nodes can improve the performance of the above parallel code. LeastAllocated indicates allocation from the NUMA node with the most available resources. + +In addition, Koordinator's CPU allocation logic is completed in the central scheduler. In this way, there will be a global perspective, avoiding the dilemma of single-node solution, where CPU resources may be sufficient but topology requirements are not met. + +## Best Practices +As can be seen from the above, Koordinator's fine-grained CPU orchestration capability can significantly improve the performance of CPU-sensitive workloads in multi-application colocation scenarios. In order to allow readers to use Koordinator’s fine-grained CPU scheduling capabilities more clearly and intuitively, this article deploys online applications to clusters in different ways, and observes the latency of services in stress testing to judge the effect of CPU scheduling capabilities. + +In this article, multiple online applications will be deployed on the same machine and pressure tested for 10 minutes to fully simulate the CPU core switching scenarios that may occur in production practice. For the colocation of online and offline applications, Koordinator provides strategies such as interference detection and BE suppression. The focus of this article is not here, and readers can pay attention to the practice in subsequent articles. + +|Group Number|Deployment Mode|Description|Scenarios| +|-|-|-|-| +|A|10 online applications are deployed on the nodes, and each node applies for 4 CPUs, all using kubernetes guaranteed QoS|Koordinator does not provide fine-grained CPU orchestration capabilities for applications|Due to CPU core switching, applications share logical cores, application performance will be affected, and it is not recommended to use| +|B|Deploy 10 online applications on the nodes, each application node has 4 CPUs, all adopt LSE QoS, CPU binding strategy adopts physical core binpacking(FullPCPUs)|Koordinator provides CPU core binding capability for LSE Pod and online applications will not share physical cores|Particularly sensitive online scenarios, application cannot accept CPU sharing at the physical core level| +|C|Deploy 10 online applications on the node, each application node has 4 CPUs, all adopt LSR QoS, CPU binding strategy adopts physical core split (SpreadByPCPUs), use CPU exclusively by physical cpu level|Koordinator provides CPU binding core capability for LSR Pod and online application logical core can use more physical core capacity|It is often used to share physical cores with offline Pods and implement time-sharing multiplexing at the physical core level. This article does not focus on the mixed deployment of online and offline applications, so it only tests the overuse of online applications| + +This experiment uses the following performance indicators to evaluate the performance of Nginx applications under different deployment methods: + +- RT (Response Time) quantile value: RT is a performance indicator that online applications usually focus on. The lower the RT, the better the online service performance. The RT indicator is obtained by collecting the information printed after the wrk pressure tests. In the experiment, it reflects the time it takes for the Nginx application to respond to the wrk request. For example, RT-p50 indicates the maximum time (median) it takes for Nginx to respond to the first 50% of wrk requests, and RT-p90 indicates the maximum time it takes for Nginx to respond to the first 90% of wrk requests. +- RPS (Request Per Second): RPS is the number of requests served by an online application per second. The more RPS a service bears, the better the performance of the online service. + + +The experimental results are as follows: + +|Performance Indicators/Deployment Mode| A(colocation of two online applications, Guaranteed)|B(colocation of two online applications, LSE、FullPCPU)|C(colocation of two online applications, LSR、SpreadByPCPU、PCPULevel| +|-|-|-|-| +|RPS| 114778.29|114648.19|115268.50| +|RT-avg (ms)|3.46 ms|3.33 ms|3.25 ms| +|RT-p90 (ms)|5.27 ms|5.11 ms|5.06 ms| +|RT-p99 (ms)|15.22 ms|12.61 ms|12.14 ms| + +- Comparing B and A, it can be found that after adopting LSE QoS to bind the core, the service response time P99 is significantly reduced, and the long tail phenomenon is well alleviated +- Comparing C and B, it can be found that after using LSR QoS to bind cores and allowing logical cores to occupy more physical core resources, more requests can be tolerated with better service response time + +In summary, in the scenario where online services are deployed on the same machine, using Koordinator to refine the CPU arrangement can effectively suppress the Noisy Neighbor problem and reduce the performance degradation caused by CPU core switching. + +### Environemnt + +First, prepare a Kubernetes cluster and install Koordinator. This article chooses two nodes of a Kubernetes cluster to do the experiment, one of the nodes is used as a test machine, which will run the Nginx online server; the other node is used as a pressure test machine, which will run the client's wrk, request the Nginx online server, and make pressure test requests . + +### Online application deployment + +1. Inject fine-grained CPU orchestration protocols into applications using ColocationProfile + + Group B fine-grained CPU orchestration protocol + + ```yaml + apiVersion: config.koordinator.sh/v1alpha1 + kind: ClusterColocationProfile + metadata: + name: colocation-profile-example + spec: + selector: + matchLabels: + app: nginx + # 采用 LSE QoS + qosClass: LSE + annotations: + # 采用物理核间堆叠 + scheduling.koordinator.sh/resource-spec: '{"preferredCPUBindPolicy":"FullPCPUs"}' + priorityClassName: koord-prod + ``` + + Group C fine-grained CPU orchestration protocol + + ```yaml + apiVersion: config.koordinator.sh/v1alpha1 + kind: ClusterColocationProfile + metadata: + name: colocation-profile-example + spec: + selector: + matchLabels: + app: nginx + # 采用 LSR QoS + qosClass: LSR + annotations: + # 采用物理核间打散且独占物理核 + scheduling.koordinator.sh/resource-spec: '{"preferredCPUBindPolicy":"SpreadByPCPUs", "preferredCPUExclusivePolicy":"PCPULevel"}' + priorityClassName: koord-prod + ``` + +2. This article uses Nginx server as Online Service , Pod YAML is as follows: + + ```yaml + --- + # nginx应用配置 + apiVersion: v1 + data: + config: |- + user nginx; + worker_processes 4; # Nginx的Worker个数,影响Nginx Server的并发。 + + events { + worker_connections 1024; # 默认值为1024。 + } + + http { + server { + listen 8000; + + gzip off; + gzip_min_length 32; + gzip_http_version 1.0; + gzip_comp_level 3; + gzip_types *; + } + } + + #daemon off; + kind: ConfigMap + metadata: + name: nginx-conf-0 + --- + # Nginx实例,作为在线类型服务应用。 + apiVersion: v1 + kind: Pod + metadata: + labels: + app: nginx + name: nginx-0 + namespace: default + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - "${node_name}" + schedulerName: koord-scheduler + priorityClassName: koord-prod + containers: + - image: 'koordinatorsh/nginx:v1.18-koord-exmaple' + imagePullPolicy: IfNotPresent + name: nginx + ports: + - containerPort: 8000 + hostPort: 8000 # 压测请求访问的端口。 + protocol: TCP + resources: + limits: + cpu: '4' + memory: 8Gi + requests: + cpu: '4' + memory: 8Gi + volumeMounts: + - mountPath: /apps/nginx/conf + name: config + hostNetwork: true + restartPolicy: Never + volumes: + - configMap: + items: + - key: config + path: nginx.conf + name: nginx-conf-0 + name: config + ``` + +3. Execute the following command to deploy the Nginx application. + + ```bash + kubectl apply -f nginx.yaml + ``` + +4. Execute the following command to view the Pod status of the Nginx application. + + ```bash + kubectl get pod -l app=nginx -o wide + ``` + + You can see output similar to the following, indicating that the Nginx application has been running normally on the test machine. + + ``` + NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES + nginx-0 1/1 Running 0 2m46s 10.0.0.246 test-machine-name + + ``` + +### Load Test + +1. On the testing machine, execute the following command to deploy the stress testing tool wrk. + + ```bash + wget -O wrk-4.2.0.tar.gz https://github.com/wg/wrk/archive/refs/tags/4.2.0.tar.gz && tar -xvf wrk-4.2.0.tar.gz + cd wrk-4.2.0 && make && chmod +x ./wrk + ``` + +2. On the testing machine, execute the following command to deploy the load testing tool wrk + + ```bash + # node_ip填写测试机的IP地址,用于wrk向测试机发起压测;8000是Nginx暴露到测试机的端口。 + taskset -c 32-45 ./wrk -t120 -c400 -d600s --latency http://${node_ip}:8000/ + ``` + +3. After waiting for wrk to finish running, obtain the pressure test results of wrk. The output format of wrk is as follows. Repeat the test several times to obtain relatively stable results. + + ``` + Running 10m test @ http://192.168.0.186:8000/ + 120 threads and 400 connections + Thread Stats Avg Stdev Max +/- Stdev + Latency 3.29ms 2.49ms 352.52ms 91.07% + Req/Sec 0.96k 321.04 3.28k 62.00% + Latency Distribution + 50% 2.60ms + 75% 3.94ms + 90% 5.55ms + 99% 12.40ms + 68800242 requests in 10.00m, 54.46GB read + Requests/sec: 114648.19 + Transfer/sec: 92.93MB + ``` + +## Conclusion + +In a Kubernetes cluster, there may be competition for resources such as CPU and memory among different business loads, which affects the performance and stability of the business. In the face of the Noisy Neighbor phenomenon, users can use Koordinator to configure more refined CPU scheduling policies for applications, so that different applications can share CPU resources collaboratively. We have shown through experiments that Koordinator's fine-grained CPU scheduling capability can effectively suppress the competition for CPU resources and improve application performance. \ No newline at end of file diff --git a/versioned_docs/version-v1.4/designs/descheduler-framework.md b/versioned_docs/version-v1.4/designs/descheduler-framework.md new file mode 100644 index 000000000..e054a557a --- /dev/null +++ b/versioned_docs/version-v1.4/designs/descheduler-framework.md @@ -0,0 +1,84 @@ +# Descheduler Framework + +## Summary + +This proposal is based on the K8s community's [descheduler](https://github.com/kubernetes-sigs/descheduler) to design and implement the descheduler framework required by the koordinator. + +## Motivation + +The existing [descheduler](https://github.com/kubernetes-sigs/descheduler) in the community can solve some problems, but we think that there are still many aspects of the descheduler that can be improved, for example, it only supports the mode of periodic execution, and does not support the event-triggered mode. It is not possible to extend and configure custom rescheduling strategies without invading the existing code of descheduler like kube-scheduler; it also does not support implementing custom evictor. + +We also noticed that the K8s descheduler community also found these problems and proposed corresponding solutions such as [#753 Descheduler framework Proposal](https://github.com/kubernetes-sigs/descheduler/issues/753) and [PoC #781](https://github.com/kubernetes-sigs/descheduler/pull/781). The K8s descheduler community tries to implement a descheduler framework similar to the k8s scheduling framework. This coincides with our thinking. + +On the whole, these solutions solved most of our problems, but we also noticed that the related implementations were not merged into the main branch. But we review these implementations and discussions, and we believe this is the right direction. Considering that Koordiantor has clear milestones for descheduler-related features, we will implement Koordinator's own descheduler independently of the upstream community. We try to use some of the designs in the [#753 PR](https://github.com/kubernetes-sigs/descheduler/issues/753) proposed by the community and we will follow the Koordinator's compatibility principle with K8s to maintain compatibility with the upstream community descheduler when implementing. Such as independent implementation can also drive the evolution of the upstream community's work on the descheduler framework. And when the upstream community has new changes or switches to the architecture that Koordinator deems appropriate, Koordinator will follow up promptly and actively. + +### Goals + +1. Implement Koordinator Descheduler following part of the design in [#753](https://github.com/kubernetes-sigs/descheduler/issues/753) proposed by the community + +### Non-Goals/Future Work + +1. Break any existing use cases of the Descheduler. + +## Proposal + +### Implementation Details/Notes/Constraints + +#### Descheduler profile + +The current descheduler configuration is too simple to support disabling or enabling plugins or supporting custom plugin configurations. The [PR #587](https://github.com/kubernetes-sigs/descheduler/pull/587) introducing descheduler profiles with v1alpha2 api version. We will use this proposal as Koordiantor Descheduler's configuration API. + +- The descheduler profile API supports user specify which extension points are enabled/disabled, alongside specifying plugin configuration. Including ability to configure multiple descheduling profiles. +- The descheduling framework configuration can be converted into an internal representation. +- To reduce need to specify value for every possible configuration, also defaulting serves as a recommended/opinionated settings for the plugins. + +#### Abstract PodEvictor interface + +Currently, descheduler has split `Pod Evictor` and `Evictor Filter`. Users can inject `Evictor Filter` on demand, and the plug-in calls `Evictor Filter` when selecting abnormal Pods to select Pods that meet the requirements and calls `Pod Evictor` to initiate eviction. At present, `Pod Evictor` has not been abstracted as an interface. We adopt the solution in [PoC #781](https://github.com/kubernetes-sigs/descheduler/pull/781) to abstract an `Evictor interface`. And refer to [PR #885](https://github.com/kubernetes-sigs/descheduler/pull/885) to add an `EvictOptions` paramters. We can implement custom Evictor based on [PodMigrationJob](https://github.com/koordinator-sh/koordinator/blob/main/docs/proposals/scheduling/20220701-pod-migration-job.md). + +The `Evictor` interface defined as follows: + +```go +type EvictOptons struct { + // PluginName represents the initiator of the eviction operation + PluginName string + // Reason allows for passing details about the specific eviction for logging. + Reason string + // DeleteOptions holds the arguments used to delete + DeleteOptions *metav1.DeleteOptions +} + +// Plugin is the parent type for all the descheduling framework plugins. +type Plugin interface { + Name() string +} + +type Evictor interface { + Plugin + // Filter checks if a pod can be evicted + Filter(pod *corev1.Pod) bool + // Evict evicts a pod (no pre-check performed) + Evict(ctx context.Context, pod *corev1.Pod, evictOptions EvictOptions) bool +} +``` + +#### Plug-in descheduler strategy + +The current descheduler has some strategies. In [PoC #781](https://github.com/kubernetes-sigs/descheduler/pull/781), it is converted into `Plugin` and executed periodically. In this `periodic execution mode`, it is appropriate to abstract the policy for Pod and Node dimensions as `DeschedulePlugin` or `BalancePlugin`. The load hotspot descheduling capability that we will implement later can also implement the BalancePlugin interface. + +The `DeschedulePlugin` and `BalancePlugin` interfaces defined as follows: + +```go +type DeschedulePlugin interface { + Plugin + Deschedule(ctx context.Context, nodes []*corev1.Node) *Status +} + +type BalancePlugin interface { + Plugin + Balance(ctx context.Context, nodes []*corev1.Node) *Status +} +``` + +We also need to support the `event-triggered mode`, which means that descheduling is performed in the form of a Controller. +In some scenarios, CRD-oriented descheduling needs to be implemented. For example, different descheduling configurations are provided according to the workload. When some abnormality is detected in the workload, descheduling will be triggered. We can think of Controller as a special form of Plugin. When the descheduler is initialized, an instance is constructed through the plugin factory function like a normal Plugin, and then a similar Run method is called to start execution. \ No newline at end of file diff --git a/versioned_docs/version-v1.4/designs/enhanced-scheduler-extension.md b/versioned_docs/version-v1.4/designs/enhanced-scheduler-extension.md new file mode 100644 index 000000000..8c61c719d --- /dev/null +++ b/versioned_docs/version-v1.4/designs/enhanced-scheduler-extension.md @@ -0,0 +1,232 @@ +# Enhanced Scheduler Extension + +## Summary + +This proposal describes how to extend the kubernetes scheduling framework without modify upstream codes to support the scheduling features that Koordinator needs to develop. + +## Motivation + +Although Kubernetes Scheduler provides the scheduling framework to help developer to extend scheduling features. However, it cannot support the features that Koordinator needs to develop, such as Reservation, problem diagnosis and analysis, etc. + +### Goals + +1. Provides scheduling extension point hook mechanism +1. Provides scheduling plugins expose state mechanism to help diagnose analysis problems + +### Non-Goals/Future Work + + +## Proposal + +### User stories + +#### Story 1 + +Koordiantor supports users to use `Reservation` CRD to reserve resources. We expect Reservation CRD objects to be scheduled like Pods. In this way, the native scheduling capabilities of Kubernetes and other extended scheduling capabilities can be reused. This requires a mechanism to disguise the Reservation CRD object as a Pod, and to extend some scheduling framework extension points to support updating the Reservation Status. + +#### Story 2 + +Koordinator provides some scheduling plugins, such as Fine-grained CPU Scheduling, Device Share Scheduling, Coscheduling, ElasticQuota, etc. These plugins are brand new, and the supported scenarios are relatively rich, and the internal logic and state of the plugins are also a bit complicated. When we may encounter some problems in the production environment and need to be diagnosed and analyzed, we need to confirm the cause of the problem based on the internal status of the plugin. But currently the kubernetes scheduling framework does not provide a mechanism to export the internal state of the plugin. + +#### Story 3 + +The scheduler provides many plugins, and most plugins implement Scoring Extension Point. How to configure the weights of these plugins needs to be decided in combination with specific problems. When the optimal node is selected according to the scoring results, the results may not meet expectations. At this point we need to be able to trace or debug these scoring results in some way. But there is currently no good way. + +### Design Details + +#### Enhancement Kubernetes Scheduling Framework principles + +At present, the kube-scheduler provided by Kubernetes can be divided into several parts. The outermost layer is `k8s.io/kubernetes/cmd/kube-scheduler`, which is the entrance of kube-scheduler; `k8s.io/kubernetes/pkg/scheduler` is responsible for integrating the framework And execute scheduling workflow, including initializing framework and plugins, scheduling Pod, etc. The core module is `k8s.io/kubernetes/pkg/scheduler/framwork`, which is the **Kubernetes Scheduling Framework**. + +Each layer provides some interfaces or methods to support developers to extend some capabilities, and the evolution speed of each layer is also different. Generally speaking, the evolution speed of the more core modules should be slower, and the evolution of core modules tends to extend rather than modify the existing interface or extension mechanism, otherwise it will bring very large cost and reliability to external dependencies. question. But each layer does not support implementing some features for some reason. But as far as the problems Koordinator is currently experiencing, there are still some workarounds. However, some principles need to be followed to reduce future conflicts with the evolution of the upstream Kubernetes community. + +1. DO NOT modify the Kubernetes Scheduling Framework. The scheduling framework is the core module of kube-scheduler and is still evolving. In order to avoid conflict with the upstream community between Koordinator's enhanced capabilities. +1. DO NOT modify the `k8s.io/kubernetes/pkg/scheduler` but can implements supported interfaces or high-order functions, such as `ScheduleAlgorithm`, `NextPod`, `Error` and `Profiles`. The `Profiles` contains an instance of the Framework interface corresponding to each KubeSchedulerProfile. We can implement the Framework and replace the instances in Profiles to get the opportunity to participate in the scheduling process to do something. +1. Extend `k8s.io/kubernetes/cmd/kube-scheduler` as simply as possible. + +#### Custom Extension Overview + +![image](/img/scheduler-extension.jpg) + +#### ExtendedHandle + +ExtendedHandle extends the k8s scheduling framework `Handle` interface to facilitate plugins to access Koordinator's resources and states. +Before constructs the `k8s.io/kubernetes/pkg/scheduler.Scheduler` object, we should build an ExtendedHandle object and pass the object to each custom plugins. + +```go +type ExtendedHandle interface { + framework.Handle + KoordinatorClientSet() koordinatorclientset.Interface + KoordinatorSharedInformerFactory() koordinatorinformers.SharedInformerFactory + SnapshotSharedLister() framework.SharedLister +} +``` + +#### Intercept plugin initialization process + +In order to pass the `ExtendedHandle` object to each custom plugins, we should intercept the plugin initialization process. +And we expect that any customized plugins can be directly and seamlessly integrated into the koordinator scheduler, so the `PluginFactory` of the plugin will not be changed. Therefore, we can modify the prototype of `k8s.io/kubernetes/cmd/kube-scheduler/app.Option` and the implementation of `k8s.io/kubernetes/cmd/kube-scheduler/app.WithPlugin` as the follows to get the opportunity to intercept the plugin initialization process. + +When the custom plugin is registered to the out-of registry using `WithPlugin`, it will use `frameworkext.PluginFactoryProxy` to wrap the plugin's original `PluginFactory`. We finally complete the interception of the plugin initialization process in `frameworkext.PluginFactoryProxy`. + +Of course, we will not modify `k8s.io/kubernetes/cmd/kube-scheduler` directly. Considering that the logic of `k8s.io/kubernetes/cmd/kube-scheduler` itself is not complicated, it will basically not bring us additional maintenance costs, so we will copy the relevant code to Koordinator for separate maintenance. + + +```go + +// Option configures a framework.Registry. +type Option func(frameworkext.ExtendedHandle, runtime.Registry) error + +// WithPlugin creates an Option based on plugin name and factory. Please don't remove this function: it is used to register out-of-tree plugins, +// hence there are no references to it from the kubernetes scheduler code base. +func WithPlugin(name string, factory runtime.PluginFactory) Option { + return func(handle frameworkext.ExtendedHandle, registry runtime.Registry) error { + return registry.Register(name, frameworkext.PluginFactoryProxy(handle, factory)) + } +} + +// frameworkext.PluginFactoryProxy +func PluginFactoryProxy(extendHandle ExtendedHandle, factoryFn frameworkruntime.PluginFactory) frameworkruntime.PluginFactory { + return func(args runtime.Object, handle framework.Handle) (framework.Plugin, error) { + impl := extendHandle.(*frameworkExtendedHandleImpl) + impl.once.Do(func() { + impl.Handle = handle + }) + return factoryFn(args, extendHandle) + } +} +``` + +#### Expose the internal state of plugins + +We will define a new extension interface to help the plugin expose the internal state through the Restful API, and provide some built-in Restful APIs to query which APIs are exposed by the current scheduler and some commonly used internal data, such as NodeInfo, etc. + +The new extension interface named `APIServiceProvider`. The plugins can implement this interface to register the API to be exposed as needed. When the plugin is initialized, `frameworkext.PluginFactoryProxy` will check whether the newly constructed plugin implements `APIServiceProvider`, and if so, it will call the `RegisterEndpoints` method of the interface to register the API. The Restful APIs exposed by these plugins will be bound to the URL path `/apis/v1/plugins/` and will be prefixed with the name of each plugin. For example, the API `/availableCPUs/:nodeName` exposed by the plugin `NodeNUMAResource` will be converted to `/apis/v1/plugins/NodeNUMAResource/availableCPUs/:nodeName`. + + +```go +type APIServiceProvider interface { + RegisterEndpoints(group *gin.RouterGroup) +} + +type ErrorMessage struct { + Message string `json:"message,omitempty"` +} + +func ResponseErrorMessage(c *gin.Context, statusCode int, format string, args ...interface{}) { + var e ErrorMessage + e.Message = fmt.Sprintf(format, args...) + c.JSON(statusCode, e) +} +``` + +Users can use the built-in API `/apis/v1/__services__` to query how many Restful APIs are provided by the current scheduler. The response as the follows: + +```json +{ + "GET": [ + "/apis/v1/__services__", + "/apis/v1/nodes/:nodeName", + "/apis/v1/plugins/Coscheduling/gang/:namespace/:name", + "/apis/v1/plugins/Coscheduling/gangs", + "/apis/v1/plugins/NodeNUMAResource/availableCPUs/:nodeName", + "/apis/v1/plugins/NodeNUMAResource/cpuTopologyOptions/:nodeName" + ] +} +``` + +Koordinator scheduler also provides `/apis/v1/nodes/:nodeNa` to expose internal `NodeInfo` to developers. + + +#### Support plugin to create Controllers + +Similar to Coscheduling/ElasticQuota Scheduling, these scheduling plugins have a matching Controller to synchronize the status of the related CRD. The most common way is to deploy these controllers independently of the scheduler. This method will not only bring additional maintenance costs and resource costs, but also if there are more states in the scheduling plugin that need to be synchronized to the CRD Status, the logic in the Controller and the logic in the plugin need to be more closely coordinated. The best way is that the Controller and the scheduling plugin are in the same process. + +We can define a new interface called `ControllerProvider`. When the plugin is initialized, `frameworkext.PluginFactoryProxy` will check whether the newly constructed plugin implements `ControllerProvider`, and if so, it will call the `NewControllers` method of the interface to get the instances of Controllers, and save these instances in the `ExtendedHandle`. When the scheduler gets the leader role, it can trigger the `ExtendedHandle` to start these controllers. + +```go +type ControllerProvider interface { + NewControllers() ([]Controller, error) +} + +type Controller interface { + Start() + Name() string +} +``` + + +#### Debug Scoring Result + +If we want to support debug scoring results, the easiest way is to directly modify `Framework.RunScorePlugins` and print the results after scoring. But this goes against the extend principles we laid out earlier. But we can think differently. When `scheduler.Scheduler` executes `scheduleOne`, it obtains an instance of the `framework.Framework` interface from `Profiles` and calls the method `RunScorePlugins`. At the same time, considering that we have maintained the initialization code of scheduler separately, then we can customize the implementation of the `framework.Framework` interface, implement the method `RunScorePlugins` and take over the `Profiles` in `scheduler.Scheduler`. In this way, we can first call the `RunScorePlugins` method of the original `framework.Framework` interface instance in the custom implemented `RunScorePlugins`, and then print the result. + +For the processing of the results, we can simply print it to the log in markdown format. When needed, enable Scoring Result debugging capability through the HTTP interface `/debug/flags/s` like `/debug/flags/v`. The developers also enable the capability via flags `--debug-scores`. + +```bash +# print top 100 score results. +$ curl -X POST schedulerIP:10251/debug/flags/s --data '100' +successfully set debugTopNScores to 100 +``` + +The following are the specific scoring results: + + +``` +| # | Pod | Node | Score | ImageLocality | InterPodAffinity | LoadAwareScheduling | NodeAffinity | NodeNUMAResource | NodeResourcesBalancedAllocation | NodeResourcesFit | PodTopologySpread | Reservation | TaintToleration | +| --- | --- | --- | ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| +| 0 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.51 | 577 | 0 | 0 | 87 | 0 | 0 | 96 | 94 | 200 | 0 | 100 | +| 1 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.50 | 574 | 0 | 0 | 85 | 0 | 0 | 96 | 93 | 200 | 0 | 100 | +| 2 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.19 | 541 | 0 | 0 | 55 | 0 | 0 | 95 | 91 | 200 | 0 | 100 | +| 3 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.18 | 487 | 0 | 0 | 15 | 0 | 0 | 90 | 82 | 200 | 0 | 100 | +``` + +| # | Pod | Node | Score | ImageLocality | InterPodAffinity | LoadAwareScheduling | NodeAffinity | NodeNUMAResource | NodeResourcesBalancedAllocation | NodeResourcesFit | PodTopologySpread | Reservation | TaintToleration | +| --- | --- | --- | ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| ---:| +| 0 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.51 | 577 | 0 | 0 | 87 | 0 | 0 | 96 | 94 | 200 | 0 | 100 | +| 1 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.50 | 574 | 0 | 0 | 85 | 0 | 0 | 96 | 93 | 200 | 0 | 100 | +| 2 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.19 | 541 | 0 | 0 | 55 | 0 | 0 | 95 | 91 | 200 | 0 | 100 | +| 3 | default/curlimage-545745d8f8-rngp7 | cn-hangzhou.10.0.4.18 | 487 | 0 | 0 | 15 | 0 | 0 | 90 | 82 | 200 | 0 | 100 | + + +#### Custom Hook Extend Points to Support Reservation Scheduling + +If we want to schedule the Reservation CRD object in the form of Pod, we need to solve several problems: + +1. Before calling `PreFilter`, check whether the Pod has a matching Reservation. If there is a matching Reservation, and if the Pod is set with `Pod Affinity/AntiAffinity` or `TopologySpreadConstraints`, we need to modify the Pod to remove these fields. The reason is that when the Reservation CRD object is created, the user generally sets these fields, and expects to find suitable nodes to reserve resources according to these scheduling constraints. Therefore, if the Pod is scheduled with the same fields, it will cause the scheduling to fail. To do this, it cannot be achieved by implementing the `PreFilter` extension point, because the scheduler has already obtained the appropriate Pod to start executing when calling `PreFilter`, and we have lost the opportunity to modify the Pod to affect other plugins. +1. In the `Filter` phase, we also need to update the NodeInfo. If there is a Reservation CRD object on NodeInfo, and the current Pod matches the Reservation CRD object, then the resources applied for by the Reservation CRD object should be returned to NodeInfo. Only in this way can it pass the resource check of the scheduler, including the network port check. + +To solve these problems, we define the `Hook` interface. The plugin can be implemented on demand, and the Pod or NodeInfo can be modified when the PreFilter/Filter is executed. Similar to the custom implementation method `RunScorePlugins` mentioned above, we can customize the implementation methods `RunPreFilterPlugins` and `RunFilterPluginsWithNominatedPods`. Before executing the real extension point logic, first execute the `Hook` interface and modify the Pod and NodeInfo. + +If necessary, you can modify the Pod or Node before executing the Score Extension Point by implementing ScorePhaseHook. + +Considering that there may be multiple different Hooks to modify the Pod or NodeInfo requirements, when the Hook is called, the Hook will be called cyclically, and the modification result of the previous Hook and the input of the next Hook will continue to be executed. + +Here are some additional explanations for the scenarios in which these new extension points should be used. If you can complete the scheduling function through the extension points such as Filter/Score provided by the K8s Scheduling Framework without modifying the incoming NodeInfo/Pod and other objects, you do not need to use these new extension points. + +```go +type SchedulingPhaseHook interface { + Name() string +} + +type PreFilterPhaseHook interface { + SchedulingPhaseHook + PreFilterHook(handle ExtendedHandle, state *framework.CycleState, pod *corev1.Pod) (*corev1.Pod, bool) +} + +type FilterPhaseHook interface { + SchedulingPhaseHook + FilterHook(handle ExtendedHandle, cycleState *framework.CycleState, pod *corev1.Pod, nodeInfo *framework.NodeInfo) (*corev1.Pod, *framework.NodeInfo, bool) +} + +type ScorePhaseHook interface { + SchedulingPhaseHook + ScoreHook(handle ExtendedHandle, cycleState *framework.CycleState, pod *corev1.Pod, nodes []*corev1.Node) (*corev1.Pod, []*corev1.Node, bool) +} + +``` + +## Alternatives + +### Use Filter instead of Filter Hook + +We can change the order of Filter plugins to support Reservation Scheduling to update NodeInfo earlier, which can replace Filter Hook. Subsequent implementations can be implemented as an optimization. diff --git a/versioned_docs/version-v1.4/designs/fine-grained-cpu-orchestration.md b/versioned_docs/version-v1.4/designs/fine-grained-cpu-orchestration.md new file mode 100644 index 000000000..c250091a5 --- /dev/null +++ b/versioned_docs/version-v1.4/designs/fine-grained-cpu-orchestration.md @@ -0,0 +1,452 @@ +# Fine-grained CPU orchestration + +## Summary + +This proposal defines the fine-grained CPU orchestration for Koordinator QoS in detail, and how to be compatible with the existing design principles and implementations of K8s. This proposal describes the functionality that koordlet, koord-runtime-proxy and koord-scheduler need to enhance. + +## Motivation + +An increasing number of systems leverage a combination of CPUs and hardware accelerators to support latency-critical execution and high-throughput parallel computation. These include workloads in fields such as telecommunications, scientific computing, machine learning, financial services and data analytics. Such hybrid systems comprise a high performance environment. + +In order to extract the best performance, optimizations related to CPU isolation, NUMA-locality are required. + +### Goals + +1. Improve the CPU orchestration semantics of Koordinator QoS. +1. Determine compatible kubelet policies. +1. Clarify how koordlet should enhance CPU scheduling mechanism. +1. Provide a set of API such as CPU bind policies, CPU exclusive policies, NUMA topology alignment policies, NUMA topology information, etc. for applications and cluster administrator to support complex CPU orchestration scenarios. +1. Provide the CPU orchestration optimization API. + +### Non-Goals/Future Work + +1. Describe specific design details of koordlet/koord-runtime-proxy. +1. Describe specific design details of CPU descheduling mechanism. + +## Proposal + +### Design Overview + +![image](/img/cpu-orchestration-seq-uml.svg) + +When koordlet starts, koordlet gather the NUMA topology information from kubelet include NUMA Topology, CPU Topology, kubelet cpu management policy, kubelet allocated CPUs for Guaranteed Pods etc., and update to the NodeResourceTopology CRD. The latency-sensitive applications are scaling, the new Pod can set Koordinator QoS with LSE/LSR, CPU Bind policy and CPU exclusive policy to require koord-scheduler to allocate best-fit CPUs to get the best performance. When koord-scheduler scheduling the Pod, koord-scheduler will filter Nodes that satisfied NUMA Topology Alignment policy, and select the best Node by scoring, allocating the CPUs in Reserve phase, and records the result to Pod annotation when PreBinding. koordlet hooks the kubelet CRI request to replace the CPUs configuration parameters with the koord-scheduler scheduled result to the runtime such as configure the cgroup. + +### User stories + +#### Story 1 + +Compatible with kubelet's existing CPU management policies. The CPU manager policy `static` allows pods with certain resource characteristics to be granted increased CPU affinity and exclusivity in the node. If enabled the `static` policy, the cluster administrator must configure the kubelet reserve some CPUs. There are some options for `static` policy. If the `full-pcpus-only(beta, visible by default)` policy option is specified, the `static` policy will always allocate full physical cores. If the `distribute-cpus-across-numa(alpha, hidden by default)` option is specified, the `static` policy will evenly distribute CPUs across NUMA nodes in cases where more than one NUMA node is required to satisfy the allocation. + +#### Story 2 + +Similarly, the semantics of the existing K8s Guaranteed Pods in the community should be compatible. The cpu cores allocated to K8s Guaranteed Pods with `static` policy will not share to the default best effort Pods, so it is equivalent to LSE. But when the load in the node is relatively low, the CPUs allocated by LSR Pods should be shared with best effort workloads to obtain economic benefits. + +#### Story 3 + +The Topology Manager is a kubelet component that aims to coordinate the set of components that are responsible for these optimizations. After Topology Manager was introduced the problem of launching pod in the cluster where worker nodes have different NUMA topology and different amount of resources in that topology became actual. The Pod could be scheduled in the node where the total amount of resources is enough, but resource distribution could not satisfy the appropriate Topology policy. + +#### Story 4 + +The scheduler can coordinate the arrangement between latency-sensitive applications. For example, the same latency-sensitive applications can be mutually exclusive in the CPU dimension, and latency-sensitive applications and general applications can be deployed in the CPU dimension affinity. Costs can be reduced and runtime quality can be guaranteed. + +#### Story 5 + +When allocating CPUs based on NUMA topology, users want to have different allocation strategies. For example, bin-packing takes precedence, or assigns the most idle NUMA Node. + +#### Story 6 + +As the application scaling or rolling deployment, the best-fit allocatable space will gradually become fragmented, which will lead to the bad allocation effect of some strategies and affect the runtime effect of the application. + +## Design Details + +### Basic CPU orchestration principles + +1. Only supports the CPU allocation mechanism of the Pod dimension. +1. Koordinator divides the CPU on the machine into `CPU Shared Pool`, `statically exclusive CPUs` and `BE CPU Shared Pool`. + 1. The `CPU Shared Pool` is the set of CPUs on which any containers in `K8s Burstable` and `Koordinator LS` Pods run. Containers in `K8s Guaranteed` pods with `fractional CPU requests` also run on CPUs in the shared pool. The shared pool contains all unallocated CPUs in the node but excluding CPUs allocated by K8s Guaranteed, LSE and LSR Pods. If kubelet reserved CPUs, the shared pool includes the reserved CPUs. + 1. The `statically exclusive CPUs` are the set of CPUs on which any containers in `K8s Guaranteed`, `Koordinator LSE` and `LSR` Pods that have integer CPU run. When K8s Guaranteed, LSE and LSR Pods request CPU, koord-scheduler will be allocated from the `CPU Shared Pool`. + 1. The `BE CPU Shared pool` is the set of CPUs on which any containers in `K8s BestEffort` and `Koordinator BE` Pods run. The `BE CPU Shared Pool` contains all CPUs in the node but excluding CPUs allocated by `K8s Guaranteed` and `Koordinator LSE` Pods. + +### Koordinator QoS CPU orchestration principles + +1. The Request and Limit of LSE/LSR Pods **MUST** be equal and the CPU value **MUST** be an integer multiple of 1000. +1. The CPUs allocated by the LSE Pod are completely **exclusive** and **MUST NOT** be shared. If the node is hyper-threading architecture, only the logical core dimension is guaranteed to be isolated, but better isolation can be obtained through the `CPUBindPolicyFullPCPUs` policy. +1. The CPUs allocated by the LSR Pod only can be shared with BE Pods. +1. LS Pods bind the CPU shared pool, **excluding** CPUs allocated by LSE/LSR Pods. +1. BE Pods bind all CPUs in the node, **excluding** CPUs allocated by LSE Pods. +1. The K8s Guaranteed Pods already running is equivalent to Koordinator LSR if kubelet enables the CPU manager `static` policy. +1. The K8s Guaranteed Pods already running is equivalent to Koordinator LS if kubelet enables the CPU manager `none` policy. +1. Newly created K8s Guaranteed Pod without Koordinator QoS specified will be treated as LS. + +![img](/img/qos-cpu-orchestration.png) + +### Compatible kubelet CPU management policies + +1. If kubelet set the CPU manager policy options `full-pcpus-only=true` / `distribute-cpus-across-numa=true`, and there is no new CPU bind policy defined by Koordinator in the node, follow the semantics of these parameters defined by the kubelet. +1. If kubelet set the Topology manager policy, and there is no new NUMA Topology Alignment policy defined by Koordinator in the node, follow the semantics of these parameters defined by the kubelet. + +### Take over kubelet CPU management policies + +Because the CPU reserved by kubelet mainly serves K8s BestEffort and Burstable Pods. But Koordinator will not follow the policy. The K8s Burstable Pods should use the CPU Shared Pool, and the K8s BestEffort Pods should use the `BE CPU Shared Pool`. + +1. For K8s Burstable and Koordinator LS Pods: + 1. When the koordlet starts, calculates the `CPU Shared Pool` and applies the shared pool to all Burstable and LS Pods in the node, that is, updating their cgroups to set cpuset. The same logic is executed when LSE/LSR Pods are creating or destroying. + 1. koordlet ignore the CPUs reserved by kubelet, and replace them with CPU Shared Pool defined by Koordinator. +1. For K8s BestEffort and Koordinator BE Pods: + 1. If kubelet reserved CPUs, the best effort Pods use the reserved CPUs first. + 1. koordlet can use all CPUs in the node but exclude the CPUs allocated by K8s Guaranteed and Koordinator LSE Pods that have integer CPU. It means that if koordlet enables the CPU Suppress feature should follow the constraint to guarantee not affecting LSE Pods. Similarly, if kubelet enables the CPU manager policy with `static`, the K8s Guaranteed Pods should also be excluded. +1. For K8s Guaranteed Pods: + 1. If there is `scheduling.koordinator.sh/resource-status` updated by koord-scheduler in the Pod annotation, then replace the CPUSet in the kubelet CRI request, including Sandbox/Container creating stage. + 1. kubelet sometimes call `Update` method defined in CRI to update container cgroup to set new CPUs, so koordlet and koord-runtime-proxy need to hook the method. +1. Automatically resize CPU Shared Pool + 1. koordlet automatically resize `CPU Shared Pool` based on the changes such as Pod creating/destroying. If `CPU Shared Pool` changed, koordlet should update cgroups of all LS/K8s Burstable Pods with the CPUs of shared pool. + 1. If the corresponding `CPU Shared Pool` is specified in the annotation `scheduling.koordinator.sh/resource-status` of the Pod, koordlet need to bind only the CPUs of the corresponding pool when configuring the cgroup. + +The takeover logic will require koord-runtime-proxy to add new extension points, and require koordlet to implement a new runtime hook plugin. When koord-runtime-proxy is not installed, these takeover logic will also be able to be implemented. + +## CPU orchestration API + +### Application CPU CPU orchestration API + +#### Resource spec + +The annotation `scheduling.koordinator.sh/resource-spec` is a resource allocation API defined by Koordinator. The user specifies the desired CPU orchestration policy by setting the annotation. In the future, we can also extend and add resource types that need to be supported as needed. The scheme corresponding to the annotation value is defined as follows: + +```go +// ResourceSpec describes extra attributes of the compute resource requirements. +type ResourceSpec struct { + PreferredCPUBindPolicy CPUBindPolicy `json:"preferredCPUBindPolicy,omitempty"` + PreferredCPUExclusivePolicy CPUExclusivePolicy `json:"preferredCPUExclusivePolicy,omitempty"` +} + +type CPUBindPolicy string + +const ( + // CPUBindPolicyDefault performs the default bind policy that specified in koord-scheduler configuration + CPUBindPolicyDefault CPUBindPolicy = "Default" + // CPUBindPolicyFullPCPUs favor cpuset allocation that pack in few physical cores + CPUBindPolicyFullPCPUs CPUBindPolicy = "FullPCPUs" + // CPUBindPolicySpreadByPCPUs favor cpuset allocation that evenly allocate logical cpus across physical cores + CPUBindPolicySpreadByPCPUs CPUBindPolicy = "SpreadByPCPUs" + // CPUBindPolicyConstrainedBurst constrains the CPU Shared Pool range of the Burstable Pod + CPUBindPolicyConstrainedBurst CPUBindPolicy = "ConstrainedBurst" +) + +type CPUExclusivePolicy string + +const ( + // CPUExclusivePolicyDefault performs the default exclusive policy that specified in koord-scheduler configuration + CPUExclusivePolicyDefault CPUExclusivePolicy = "Default" + // CPUExclusivePolicyPCPULevel represents mutual exclusion in the physical core dimension + CPUExclusivePolicyPCPULevel CPUExclusivePolicy = "PCPULevel" + // CPUExclusivePolicyNUMANodeLevel indicates mutual exclusion in the NUMA topology dimension + CPUExclusivePolicyNUMANodeLevel CPUExclusivePolicy = "NUMANodeLevel" +) +``` + +- The `CPUBindPolicy` defines the CPU binding policy. The specific values are defined as follows: + - `CPUBindPolicyDefault` or empty value performs the default bind policy that specified in koord-scheduler configuration. + - `CPUBindPolicyFullPCPUs` is a bin-packing policy, similar to the `full-pcpus-only=true` option defined by the kubelet, that allocate full physical cores. However, if the number of remaining logical CPUs in the node is sufficient but the number of full physical cores is insufficient, the allocation will continue. This policy can effectively avoid the noisy neighbor problem. + - `CPUBindPolicySpreadByPCPUs` is a spread policy. If the node enabled Hyper-Threading, when this policy is adopted, the scheduler will evenly allocate logical CPUs across physical cores. For example, the current node has 8 physical cores and 16 logical CPUs. When a Pod requires 8 logical CPUs and the `CPUBindPolicySpreadByPCPUs` policy is adopted, the scheduler will allocate an logical CPU from each physical core. This policy is mainly used by some latency-sensitive applications with multiple different peak-to-valley characteristics. It can not only allow the application to fully use the CPU at certain times, but will not be disturbed by the application on the same physical core. So the noisy neighbor problem may arise when using this policy. + - `CPUBindPolicyConstrainedBurst` a special policy that mainly helps K8s Burstable/Koordinator LS Pod get better performance. When using the policy, koord-scheduler is filtering out Nodes that have NUMA Nodes with suitable CPU Shared Pool by Pod Limit. After the scheduling is successful, the scheduler will update `scheduling.koordinator.sh/resource-status` in the Pod, declaring the `CPU Shared Pool` to be bound. The koordlet binds the CPU Shared Pool of the corresponding NUMA Node according to the `CPU Shared Pool` + - If `kubelet.koordinator.sh/cpu-manager-policy` in `NodeResourceTopology` has option `full-pcpus-only=true`, or `node.koordinator.sh/cpu-bind-policy` in the Node with the value `PCPUOnly`, the koord-scheduler will check whether the number of CPU requests of the Pod meets the `SMT-alignment` requirements, so as to avoid being rejected by the kubelet after scheduling. koord-scheduler will avoid such nodes if the Pod uses the `CPUBindPolicySpreadByPCPUs` policy or the number of logical CPUs mapped to the number of physical cores is not an integer. +- The `CPUExclusivePolicy` defines the CPU exclusive policy, it can help users to avoid noisy neighbor problems. The specific values are defined as follows: + - `CPUExclusivePolicyDefault` or empty value performs the default exclusive policy that specified in koord-scheduler configuration. + - `CPUExclusivePolicyPCPULevel`. When allocating logical CPUs, try to avoid physical cores that have already been applied for by the same exclusive policy. It is a supplement to the `CPUBindPolicySpreadByPCPUs` policy. + - `CPUExclusivePolicyNUMANodeLevel`. When allocating logical CPUs, try to avoid NUMA Nodes that has already been applied for by the same exclusive policy. If there is no NUMA Node that satisfies the policy, downgrade to `CPUExclusivePolicyPCPULevel` policy. + +For the ARM architecture, `CPUBindPolicy` only support `CPUBindPolicyFullPCPUs`, and `CPUExclusivePolicy` only support `CPUExclusivePolicyNUMANodeLevel`. + +#### Resource status + +The annotation `scheduling.koordinator.sh/resource-status` represents resource allocation result. koord-scheduler patch Pod with the annotation before binding to node. koordlet uses the result to configure cgroup. + +The scheme corresponding to the annotation value is defined as follows: + +```go +type ResourceStatus struct { + CPUSet string `json:"cpuset,omitempty"` + CPUSharedPools []CPUSharedPool `json:"cpuSharedPools,omitempty"` +} +``` + +- `CPUSet` represents the allocated CPUs. When LSE/LSR Pod requested, koord-scheduler will update the field. It is Linux CPU list formatted string. For more details, please refer to [doc](http://man7.org/linux/man-pages/man7/cpuset.7.html#FORMATS). +- `CPUSharedPools` represents the desired CPU Shared Pools used by LS Pods. If the Node has the label `node.koordinator.sh/numa-topology-alignment-policy` with `Restricted/SingleNUMANode`, koord-scheduler will find the best-fit NUMA Node for the LS Pod, and update the field that requires koordlet uses the specified CPU Shared Pool. It should be noted that the scheduler does not update the `CPUSet` field in the `CPUSharedPool`, koordlet binds the CPU Shared Pool of the corresponding NUMA Node according to the `Socket` and `Node` fields in the `CPUSharedPool`. + +#### Example + +The following specific example: + +```yaml +apiVersion: v1 +kind: Pod +metadata: + annotations: + scheduling.koordinator.sh/resource-spec: |- + { + "preferredCPUBindPolicy": "SpreadByPCPUs", + "preferredCPUExclusivePolicy": "PCPULevel" + } + scheduling.koordinator.sh/resource-status: |- + { + "cpuset": "0-3" + } + name: test-pod + namespace: default +spec: + ... +``` + +### Node CPU orchestration API + +From the perspective of cluster administrators, it is necessary to support some APIs to control the CPU orchestration behavior of nodes. + +#### CPU bind policy + +The label `node.koordinator.sh/cpu-bind-policy` constrains how to bind CPU logical CPUs when scheduling. + +The following is the specific value definition: +- `None` or empty value does not perform any policy. +- `FullPCPUsOnly` requires that the scheduler must allocate full physical cores. Equivalent to kubelet CPU manager policy option `full-pcpus-only=true`. +- `SpreadByPCPUs` requires that the schedler must evenly allocate logical CPUs across physical cores. + +If there is no `node.koordinator.sh/cpu-bind-policy` in the node's label, it will be executed according to the policy configured by the Pod or koord-scheduler. + +#### NUMA allocate strategy + +The label `node.koordinator.sh/numa-allocate-strategy` indicates how to choose satisfied NUMA Nodes when scheduling. The following is the specific value definition: +- `MostAllocated` indicates that allocates from the NUMA Node with the least amount of available resource. +- `LeastAllocated` indicates that allocates from the NUMA Node with the most amount of available resource. +- `DistributeEvenly` indicates that evenly distribute CPUs across NUMA Nodes. + +If the cluster administrator does not set label `node.koordinator.sh/numa-allocate-strategy` on Node, but `kubelet.koordinator.sh/cpu-manager-policy` in `NodeResourceTopology` has option `distribute-cpus-across-numa=true`, then follow the semantic allocation of `distribute-cpus-across-numa`. + +If there is no `node.koordinator.sh/numa-allocate-strategy` in the node's label and no `kubelet.koordinator.sh/cpu-manager-policy` with `distribute-cpus-across-numa` option in `NodeResourceTopology`, it will be executed according to the policy configured by the koord-scheduler. + +If both `node.koordinator.sh/numa-allocate-strategy` and `kubelet.koordinator.sh/cpu-manager-policy` are defined, `node.koordinator.sh/numa-allocate-strategy` is used first. + +#### NUMA topology alignment policy + +The label `node.koordinator.sh/numa-topology-alignment-policy` represents that how to aligning resource allocation according to the NUMA topology. The policy semantic follow the K8s community. Equivalent to the field `TopologyPolicies` in `NodeResourceTopology`, and the topology policies `SingleNUMANodePodLevel` and `SingleNUMANodeContainerLevel` are mapping to `SingleNUMANode` policy. + +- `None` is the default policy and does not perform any topology alignment. +- `BestEffort` indicates that preferred select NUMA Node that is topology alignment, and if not, continue to allocate resources to Pods. +- `Restricted` indicates that each resource requested by a Pod on the NUMA Node that is topology alignment, and if not, koord-scheduler will skip the node when scheduling. +- `SingleNUMANode` indicates that all resources requested by a Pod must be on the same NUMA Node, and if not, koord-scheduler will skip the node when scheduling. + +If there is no `node.koordinator.sh/numa-topology-alignment-policy` in the node's label and `TopologyPolicies=None` in `NodeResourceTopology`, it will be executed according to the policy configured by the koord-scheduler. + +If both `node.koordinator.sh/numa-topology-alignment-policy` in Node and `TopologyPolicies=None` in `NodeResourceTopology` are defined, `node.koordinator.sh/numa-topology-alignment-policy` is used first. + +#### Example + +The following specific example: + +```yaml +apiVersion: v1 +kind: Node +metadata: + labels: + node.koordinator.sh/cpu-bind-policy: "FullPCPUsOnly" + node.koordinator.sh/numa-topology-alignment-policy: "BestEffort" + node.koordinator.sh/numa-allocate-strategy: "MostAllocated" + name: node-0 +spec: + ... +``` + +### NodeResourceTopology CRD + +The node resource information to be reported mainly includes the following categories: + +- NUMA Topology, including resources information, CPU information such as logical CPU ID, physical Core ID, NUMA Socket ID and NUMA Node ID and etc. +- The topology manager scopes and policies configured by kubelet. +- The CPU manager policies and options configured by kubelet. +- Pod bound CPUs allocated by kubelet or koord-scheduler, including K8s Guaranteed Pods, Koordinator LSE/LSR Pods but except the LS/BE. +- CPU Shared Pool defined by koordlet + +The above information can guide koord-scheduler to better be compatible with the kubelet's CPU management logic, make more appropriate scheduling decisions and help users quickly troubleshoot. + +#### CRD Scheme definition + +We use [NodeResourceTopology](https://github.com/k8stopologyawareschedwg/noderesourcetopology-api/blob/master/pkg/apis/topology/v1alpha1/types.go) CRD to describe the NUMA Topology. The community-defined NodeResourceTopology CRD is mainly used for the following considerations: + +- NodeResourceTopology already contains basic NUMA topology information and kubelet TopologyManager's Scope and Policies information. We can reuse the existing codes. +- Keep up with the evolution of the community and influence the community to make more changes. + +#### Compatible + +The koordlet creates or updates NodeResourceTopology periodically. The name of NodeResourceTopology is same as the name of Node. and add the label `app.kubernetes.io/managed-by=Koordinator` describes the node is managed by Koordinator. + +#### Extension + +At present, the NodeResourceTopology lacks some information, and it is temporarily written in the NodeResourceTopology in the form of annotations or labels: + +- The annotation `kubelet.koordinator.sh/cpu-manager-policy` describes the kubelet CPU manager policy and options. The scheme is defined as follows: + +```go +const ( + FullPCPUsOnlyOption string = "full-pcpus-only" + DistributeCPUsAcrossNUMAOption string = "distribute-cpus-across-numa" +) + +type KubeletCPUManagerPolicy struct { + Policy string `json:"policy,omitempty"` + Options map[string]string `json:"options,omitempty"` + ReservedCPUs string `json:"reservedCPUs,omitempty"` +} + +``` + +- The annotation `node.koordinator.sh/cpu-topology` describes the detailed CPU topology. Fine-grained management mechanism needs more detailed CPU topology information. The scheme is defined as follows: + +```go +type CPUTopology struct { + Detail []CPUInfo `json:"detail,omitempty"` +} + +type CPUInfo struct { + ID int32 `json:"id"` + Core int32 `json:"core"` + Socket int32 `json:"socket"` + Node int32 `json:"node"` +} +``` + +- annotation `node.koordinator.sh/pod-cpu-allocs` describes the CPUs allocated by Koordinator LSE/LSR and K8s Guaranteed Pods. The scheme corresponding to the annotation value is defined as follows: + +```go +type PodCPUAlloc struct { + Namespace string `json:"namespace,omitempty"` + Name string `json:"name,omitempty"` + UID types.UID `json:"uid,omitempty"` + CPUSet string `json:"cpuset,omitempty"` + ManagedByKubelet bool `json:"managedByKubelet,omitempty"` +} + +type PodCPUAllocs []PodCPUAlloc +``` + +- The annotation `node.koordinator.sh/cpu-shared-pools` describes the CPU Shared Pool defined by Koordinator. The shared pool is mainly used by Koordinator LS Pods or K8s Burstable Pods. The scheme is defined as follows: + +```go +type NUMACPUSharedPools []CPUSharedPool + +type CPUSharedPool struct { + Socket int32 `json:"socket"` + Node int32 `json:"node"` + CPUSet string `json:"cpuset,omitempty"` +} +``` +The `CPUSet` field is Linux CPU list formatted string. For more details, please refer to [doc](http://man7.org/linux/man-pages/man7/cpuset.7.html#FORMATS). + + +#### Create/Update NodeResourceTopology + +- koordlet is responsible for creating/updating NodeResourceTopology +- It is recommended that koordlet obtain the CPU allocation information of the existing K8s Guaranteed Pod by parsing the CPU state checkpoint file. Or obtain this information through the CRI interface and gRPC provided by kubelet. +- When the CPU of the Pod is allocated by koord-scheduler, replace the CPUs in the kubelet state checkpoint file. +- It is recommended that koordlet obtain the CPU manager policy and options from [kubeletConfiguration](https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/). + +#### Example + +A complete NodeResourceTopology example: + +```yaml +apiVersion: topology.node.k8s.io/v1alpha1 +kind: NodeResourceTopology +metadata: + annotations: + kubelet.koordinator.sh/cpu-manager-policy: |- + { + "policy": "static", + "options": { + "full-pcpus-only": "true", + "distribute-cpus-across-numa": "true" + } + } + node.koordinator.sh/cpu-topology: |- + { + "detail": [ + { + "id": 0, + "core": 0, + "socket": 0, + "node": 0 + }, + { + "id": 1, + "core": 1, + "socket": 1, + "node": 1 + } + ] + } + node.koordinator.sh/cpu-shared-pools: |- + [ + { + "socket": 0, + "node": 0, + "cpuset": "0-3" + } + ] + node.koordinator.sh/pod-cpu-allocs: |- + [ + { + "namespace": "default", + "name": "static-guaranteed-pod", + "uid": "32b14702-2efe-4be9-a9da-f3b779175846", + "cpu": "4-8", + "managedByKubelet": "true" + } + ] + labels: + app.kubernetes.io/managed-by: Koordinator + name: node1 +topologyPolicies: ["SingleNUMANodePodLevel"] +zones: + - name: node-0 + type: Node + resources: + - name: cpu + capacity: 20 + allocatable: 15 + available: 10 + - name: vendor/nic1 + capacity: 3 + allocatable: 3 + available: 3 + - name: node-1 + type: Node + resources: + - name: cpu + capacity: 30 + allocatable: 25 + available: 15 + - name: vendor/nic2 + capacity: 6 + allocatable: 6 + available: 6 + - name: node-2 + type: Node + resources: + - name: cpu + capacity: 30 + allocatable: 25 + available: 15 + - name: vendor/nic1 + capacity: 3 + allocatable: 3 + available: 3 + - name: node-3 + type: Node + resources: + - name: cpu + capacity: 30 + allocatable: 25 + available: 15 + - name: vendor/nic1 + capacity: 3 + allocatable: 3 + available: 3 +``` \ No newline at end of file diff --git a/versioned_docs/version-v1.4/designs/fine-grained-device-scheduling.md b/versioned_docs/version-v1.4/designs/fine-grained-device-scheduling.md new file mode 100644 index 000000000..e27e8a951 --- /dev/null +++ b/versioned_docs/version-v1.4/designs/fine-grained-device-scheduling.md @@ -0,0 +1,408 @@ +# Fine-grained device scheduling + +## Summary + +This proposal provides a fine-grained mechanism for managing GPUs and other devices such as RDMA and FPGA, defines a set of APIs to describe device information on nodes, including GPU, RDMA, and FPGA, and a new set of resource names to flexibly support users to apply at a finer granularity GPU resources. This mechanism is the basis for subsequent other GPU scheduling capabilities such as GPU Share, GPU Overcommitment, etc. + +## Motivation + +GPU devices have very strong computing power, but are expensive. How to make better use of GPU equipment, give full play to the value of GPU and reduce costs is a problem that needs to be solved. In the existing GPU allocation mechanism of the K8s community, the GPU is allocated by the kubelet, and it is a complete device allocation. This method is simple and reliable, but similar to the CPU and memory, the GPU will also be wasted. Therefore, some users expect to use only a portion of the GPU's resources and share the rest with other workloads to save costs. Moreover, GPU has particularities. For example, the NVLink and oversold scenarios supported by NVIDIA GPU mentioned below both require a central decision through the scheduler to obtain globally optimal allocation results. + +![image](/img/nvlink.jpg) + +From the picture, we can find that although the node has 8 GPU instances whose model is A100/V100, the data transmission speed between GPU instances is different. When a Pod requires multiple GPU instances, we can assign the Pod the GPU instances with the maximum data transfer speed combined relationship. In addition, when we want the GPU instances among a group of Pods to have the maximum data transfer speed combined relationship, the scheduler should batch allocate the best GPU instances to these Pods and assign them to the same node. + +### Goals + +1. Definition Device CRD and the Resource API. +1. Provides a reporter component in koordlet to report Device information and resource capacities. +1. Provides a scheduler plugin to support users to apply at a finer granularity GPU resources. +1. Provider a new runtime hook plugin in koordlet to support update the environments of containers with GPUs that be allocated by scheduler. + +### Non-goals/Future work + +1. Define flexible allocation strategies, such as implementing BinPacking or Spread according to GPU resources + +## Proposal + +### API + +#### Device resource dimensions + +Due to GPU is complicated, we will introduce GPU first. As we all know there is compute and GPU Memory capability for the GPU device. Generally user apply GPU like "I want 1/2/4/8 GPUs", but if node support GPU level isolation mechanism, user may apply GPU like "I want 0.5/0.25 GPU resources". Moreover, user may set different compute capability and GPU memory capability for best resource utilization, so the user want apply GPU like "I want X percent of "compute capability and Y percent of memory capability". + +We abstract GPU resources into different dimensions: + +- `kubernetes.io/gpu-core` represents the computing capacity of the GPU. Similar to K8s MilliCPU, we abstract the total computing power of GPU into one hundred, and users can apply for the corresponding amount of GPU computing power according to their needs. +- `kubernetes.io/gpu-memory` represents the memory capacity of the GPU in bytes. +- `kubernetes.io/gpu-memory-ratio` represents the percentage of the GPU's memory. + +Assuming that node A has 4 GPU instances, and the total memory of each instance is 8GB, when device reporter reports GPU capacity information to `Node.Status.Allocatable`, it no longer reports nvidia.com/gpu=4, but reports the following information: + +```yaml +status: + capacity: + kubernetes.io/gpu-core: 400 + kubernetes.io/gpu-memory: "32GB" + kubernetes.io/gpu-memory-ratio: 400 + allocatable: + kubernetes.io/gpu-core: 400 + kubernetes.io/gpu-memory: "32GB" + kubernetes.io/gpu-memory-ratio: 400 +``` + +For the convenience of users, an independent resource name `kubernetes.io/gpu` is defined. For example, when a user wants to use half of the computing resources and memory resources of a GPU instance, the user can directly declare `kubernetes.io/gpu: 50`, and the scheduler will convert it to `kubernetes.io/gpu-core: 50, kubernetes.io/gpu-memory-ratio: 50` + +For other devices like RDMA and FPGA, the node has 1 RDMA and 1 FGPA, will report the following information: + +```yaml +status: + capacity: + kubernetes.io/rdma: 100 + kubernetes.io/fpga: 100 + allocatable: + kubernetes.io/rdma: 100 + kubernetes.io/fpga: 100 +``` + +Why do we need `kubernetes.io/gpu-memory-ratio` and `kubernetes.io/gpu-memory` ? +When user apply 0.5/0.25 GPU, the user don't know the exact memory total bytes per GPU, only wants to use +half or quarter percentage of memory, so user can request the GPU memory with `kubernetes.io/gpu-memory-ratio`. +When scheduler assigned Pod on concrete node, scheduler will translate the `kubernetes.io/gpu-memory-ratio` to `kubernetes.io/gpu-memory` by the formulas: ***allocatedMemory = totalMemoryOf(GPU) * `kubernetes.io/gpu-memory-ratio`***, so that the GPU isolation can work. + +During the scheduling filter phase, the scheduler will do special processing for `kubernetes.io/gpu-memory` and `kubernetes.io/gpu-memory-ratio`. When a Pod specifies `kubernetes.io/gpu-memory-ratio`, the scheduler checks each GPU instance on each node for unallocated or remaining resources to ensure that the remaining memory on each GPU instance meets the ratio requirement. + +If the user knows exactly or can roughly estimate the specific memory consumption of the workload, he can apply for GPU memory through `kubernetes.io/gpu-memory`. All details can be seen below. + +Besides, when dimension's value > 100, means Pod need multi-devices. now only allow the value can be divided by 100. + +#### User apply device resources scenarios + +##### Compatible with `nvidia.com/gpu` + +```yaml +resources: + requests: + nvidia.com/gpu: "2" + cpu: "4" + memory: "8Gi" +``` + +The scheduler translates the `nvida.com/gpu: 2` to the following spec: + +```yaml +resources: + requests: + kubernetes.io/gpu-core: "200" + kubernetes.io/gpu-memory-ratio: "200" + kubernetes.io/gpu-memory: "16Gi" # assume 8G memory in bytes per GPU + cpu: "4" + memory: "8Gi" +``` + +##### Apply whole resources of GPU or part resources of GPU + +```yaml +resources: + requests: + kubernetes.io/gpu: "50" + cpu: "4" + memory: "8Gi" +``` + +The scheduler translates the `kubernetes.io/gpu: "50"` to the following spec: + +```yaml +resources: + requests: + kubernetes.io/gpu-core: "50" + kubernetes.io/gpu-memory-ratio: "50" + kubernetes.io/gpu-memory: "4Gi" # assume 8G memory in bytes for the GPU + cpu: "4" + memory: "8Gi" +``` + +##### Apply `kubernetes.io/gpu-core` and `kubernetes.io/gpu-memory-ratio` separately + +```yaml +resources: + requests: + kubernetes.io/gpu-core: "50" + kubernetes.io/gpu-memory-ratio: "60" + cpu: "4" + memory: "8Gi" +``` + +##### Apply `kubernetes.io/gpu-core` and `kubernetes.io/gpu-memory` separately + +```yaml +resources: + requests: + kubernetes.io/gpu-core: "60" + kubernetes.io/gpu-memory: "4Gi" + cpu: "4" + memory: "8Gi" +``` + +##### Apply RDMA + +```yaml +resources: + requests: + kubernetes.io/rdma: "100" + cpu: "4" + memory: "8Gi" +``` + +### Implementation Details + +#### Scheduling + +1. Abstract new data structure to describe resources and healthy status per device on the node. +2. Implements the Filter/Reserve/PreBind extenstion points. +3. Automatically recognize different kind devices. When a new device added, we don't need modify any code + +##### DeviceAllocation + +In the PreBind stage, the scheduler will update the device (including GPU) allocation results, including the device's Minor and resource allocation information, to the Pod in the form of annotations. + +```go +/* +{ + "gpu": [ + { + "minor": 0, + "resouurces": { + "kubernetes.io/gpu-core": 100, + "kubernetes.io/gpu-mem-ratio": 100, + "kubernetes.io/gpu-mem": "16Gi" + } + }, + { + "minor": 1, + "resouurces": { + "kubernetes.io/gpu-core": 100, + "kubernetes.io/gpu-mem-ratio": 100, + "kubernetes.io/gpu-mem": "16Gi" + } + } + ] +} +*/ +type DeviceAllocation struct { + Minor int32 + Resources map[string]resource.Quantity +} + +type DeviceAllocations map[DeviceType][]*DeviceAllocation +``` + +##### NodeDevicePlugin + +```go +var ( + _ framework.PreFilterPlugin = &NodeDevicePlugin{} + _ framework.FilterPlugin = &NodeDevicePlugin{} + _ framework.ReservePlugin = &NodeDevicePlugin{} + _ framework.PreBindPlugin = &NodeDevicePlugin{} +) + +type NodeDevicePlugin struct { + frameworkHandler framework.Handle + nodeDeviceCache *NodeDeviceCache +} + +type NodeDeviceCache struct { + lock sync.Mutex + nodeDevices map[string]*nodeDevice +} + +type nodeDevice struct { + lock sync.Mutex + DeviceTotal map[DeviceType]deviceResource + DeviceFree map[DeviceType]deviceResource + DeviceUsed map[DeviceType]deviceResource + AllocateSet map[DeviceType]*corev1.PodList +} + +// We use `deviceResource` to present resources per device. +// "0": {kubernetes.io/gpu-core:100, kubernetes.io/gpu-memory-ratio:100, kubernetes.io/gpu-memory: 16GB} +// "1": {kubernetes.io/gpu-core:100, kubernetes.io/gpu-memory-ratio:100, kubernetes.io/gpu-memory: 16GB} +type deviceResources map[int]corev1.ResourceList + +``` + +We will register node and device event handler to maintain device account. + +- In Filter, we will make-up each device request by a node(the gpu-memory example), and try compare each device free resource and Pod device request. +- In Reserve/Unreserve, we will update nodeDeviceCache's used/free resource and allocateSet. Now device selection rule just based on device minor id order. +- In PreBind, we will write DeviceAllocations to Pod's annotation. +- In Init stage, we should list all Node/Device/Pods to recover device accounts. + +#### Device Reporter + +Implements a new component called `Device Reporter` in koordlet to create or update `Device` CRD instance with the resources information and healthy status per device including GPU, RDMA and FPGA, etc. This version we only support GPU. It will execution `nccl` commands to get each minor resource just like k8s-gpu-device-plugins. We will apply community health check logic. + +#### Device CRD Scheme definition +```go +type DeviceType string + +const ( + GPU DeviceType = "gpu" + FPGA DeviceType = "fpga" + RDMA DeviceType = "rdma" +) + +type DeviceSpec struct { + Devices []DeviceInfo `json:"devices"` +} + +type DeviceInfo struct { + // UUID represents the UUID of device + UUID string `json:"id,omitempty"` + // Minor represents the Minor number of Device, starting from 0 + Minor int32 `json:"minor,omitempty"` + // Type represents the type of device + Type DeviceType `json:"deviceType,omitempty"` + // Health indicates whether the device is normal + Health bool `json:"health,omitempty"` + // Resources represents the total capacity of various resources of the device + Resources map[string]resource.Quantity `json:"resource,omitempty"` +} + +type DeviceStatus struct {} + +type Device struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec DeviceSpec `json:"spec,omitempty"` + Status DeviceStatus `json:"status,omitempty"` +} + +type DeviceList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + + Items []Device `json:"items"` +} +``` + +##### Compatible + +Considering that some users already have many existing GPU Pods in their clusters, it is necessary to ensure that Koordinator GPU Scheduling does not repeatedly allocate the GPU devices held by these GPU Pods. Therefore, koord-scheduler needs to obtain the GPU devices's information held by these existing Pods. These GPU devices are allocated by the kubelet and recorded in the local file `/var/lib/kubelet/device-plugins/kubelet_internal_checkpoint`, so the device reporter will parse the file to obtain the GPU Device ID assigned to each Pod. When parsing, it needs to exclude the Pod that allocates GPU through koord-scheduler, and finally update it to Device CRD in the form of annotation. The corresponding annotation key is `node.koordinator.sh/devices-checkpoints`, and the annotation value is defined as follows: + +```go +type PodDevicesEntry struct { + PodUID string `json:"podUID,omitempty"` + ContainerName string `json:"containerName,omitempty"` + ResourceName string `json:"resourceName,omitempty"` + DeviceIDs []string `json:"deviceIDs,omitempty"` + AllocResp []byte `json:"allocResp,omitempty"` +} + +type PodDevicesEntries []PodDevicesEntry +``` + +#### CRD Example +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: Device +metadata: + name: node-1 + annotations: + node.koordinator.sh/gpu-checkpoints: |- + [ + { + "podUID": "fa8983dc-bb76-4eeb-8dcc-556fbd44d7ce", + "containerName": "cuda-container", + "resourceName": "nvidia.com/gpu", + "deviceIDs": ["GPU-36b27e44-b086-46f7-f2dc-73c36dc65991"] + } + ] +spec: + devices: + - health: true + id: GPU-98583a5c-c155-9cf6-f955-03c189d3dbfb + minor: 0 + resources: + kubernetes.io/gpu-core: "100" + kubernetes.io/gpu-memory: 15472384Ki + kubernetes.io/gpu-memory-ratio: "100" + type: gpu + - health: true + id: GPU-7f6410b9-bdf7-f9a5-de09-aa5ec31a7124 + minor: 1 + resources: + kubernetes.io/gpu-core: "100" + kubernetes.io/gpu-memory: 15472384Ki + kubernetes.io/gpu-memory-ratio: "100" + type: gpu +status: {} +``` + +#### koordlet and koord-runtime-proxy + +Our target is to work compatible with origin k8s kubelet and k8s device plugins, so: + +1. We still allow kubelet and device plugin to allocate concrete device, which means no matter there's a k8s device +plugin or not, our design can work well. + +2. In koord-runtime-proxy, we will use Pod's `DeviceAllocation` in annotation to replace the step1's result of container's +args and envs. + +We should modify protocol between koord-runtime-proxy and koordlet to add container env: + +```go +type ContainerResourceHookRequest struct { + .... + Env map[string]string +} + +type ContainerResourceHookResponse struct { + .... + Env map[string]string +} +``` + +Then we will add a new `gpu-hook` in koordlet's runtimehooks, registered to `PreCreateContainer` stage. +We will generate new GPU env `NVIDIA_VISIBLE_DEVICES` by Pod GPU allocation result in annotation. + +The koord-runtime-proxy can see these Pod's env, we need koord-runtime-proxy to pass these environments to koordlet, and koordlet parse the GPU related env to find the concrete device ids. + +Besides, the koordlet should report GPU model to node labels same as device plugin, this is in-case Koordinator working without device-plugin. + +Finally, we should modify `ContainerResourceExecutor`'s `UpdateRequest` function in koord-runtime-proxy, and let new GPU env covering old GPU env. + +When we handle hot-update processing, we can handle the existing scheduled Pods without device allocation in Pod's annotation. If GPU allocation info is not in annotation, we will find the GPU allocations from `ContainerResourceHookRequest`'s `Env`, and we will update all GPU allocations to Device CRD instance. + +### Compatibility + +As we know, the GPU scheduling in kube-scheduler side has no any different with other scalar resources. The concrete device-level assigning is done by kubelet and GPU device plugin, which will generate container's GPU env. + +Our design has no conflict with the above process. Our device reporter reports Koordinator GPU resources for kubelet +updating node resources. Then we schedule device request in our new plugin with new device resource account. In pre-bind +stage, we will update container resources with Koordinator GPU resources, this is for kubelet to check resource limitation. +We will also add device allocation information to Pod's annotation. In node side, the k8s device plugin will first patch +container env, but we will overwrite these envs in runtimeproxy by allocation result in Pod's annotation. + +### Upgrade strategy + +If using Koordinator GPU Scheduling to schedule GPU Pods in a brand new cluster, simply install Koordinator components. + +However, if you want to upgrade to Koordinator GPU Scheduing in an existing cluster, you need to avoid GPU devices being repeatedly allocated because of switching between different scheduling mechanisms. You need to pay attention to the order when upgrading: +1. Install the Koordinator components. In particular, make sure that the koordlets are all started successfully. +2. Stop the system or platform that creates the new GPU Pod. +3. Stop the scheduler currently responsible for the GPU Pod and ensure that there are no pending GPU Pods in the current cluster. +3. Wait a few minutes to ensure that each node's koordlet creates and updates the Device CRD. +4. Modify all components that create GPU Pods to switch the schedulerName of the Pod to koord-scheduler +5. Start trying to create a GPU Pod and verify the koord-scheduler GPU Scheduling scheduling result. +6. Restore the system or platform that created the GPU Pod and the old scheduler. + +In the future Koordinator will provide a webhook to solve the upgrade existing cluster problem. The webhook will identify the GPU Pod and modify the schedulerName of the newly created GPU Pod to koord-scheduler. At the same time, the webhook will take over the Binding operation of the GPU Pod. If the Binding is not initiated by koord-scheduler, it will be rejected. + +## Unsolved Problems + +## Alternatives + +1. User can choose whether use k8s-device plugin. as mentioned above, we can compatible in both cases. diff --git a/versioned_docs/version-v1.4/designs/gang-scheduling.md b/versioned_docs/version-v1.4/designs/gang-scheduling.md new file mode 100644 index 000000000..dbe2762e9 --- /dev/null +++ b/versioned_docs/version-v1.4/designs/gang-scheduling.md @@ -0,0 +1,385 @@ +# GangScheduling + +## Summary +This proposal provides Gang mechanism for the scheduler to control pods binding opportunity. User can declare a resource-collection-minimum number, +only when assigned-resources reach the given limitation can trigger the binding. We provide `Strict` and `NonStrict` to +control the resource-accumulation-process by a configuration. We also provide a two-level Gang description for better matching +the real scenario, which is different from community. + +## Motivation +In AI scenarios, lots of jobs need Gang scheduling. The community have lots of related implements such as `Coscheduling` or `vocalno`. +We received lots of inspirations in the design process from them. + +### Compared with competitors + +#### Coscheduling +1. `Coscheduling` implement a new queue-sort interface and other methods to let one Gang's pods get out of the queue in order as much as possible. +If a pod failed to be scheduled, the requests that have been successfully scheduled in this round of Gang scheduling cycle will be rolled back, +and the remaining pods waiting for scheduling will be rejected in PreFilter check until this scheduling cycle passed. +For example, there is a Gang requires 10 tasks to be scheduled, if first 5 tasks allocated, the 6th task failed to be scheduled, +`Coscheduling` will roll-back first 5 tasks and ignore the remaining 4 tasks in this Gang scheduling cycle. `Coscheduling` simply use a +global time interval to control the Gang scheduling cycle. The first defect is that the uniform time interval will cause +some problems. If the time configuration is too long, it will lead to useless waiting; If the time configuration is too short, +it will lead to useless scheduling. Secondly, it is very difficult for a large job to meet all resource requests at one time. +This mechanism will lead to a very low probability of full resources, and eventually make the job starve to death. We call this process as `Strict`. + +2. Some jobs have complex Gang requirements. For example, a job has several roles. Each role will have several pods +and its own Gang conditions. Jobs also need different roles to form different GangGroups. All pods in a GangGroup can +trigger the bind process only after all roles in a GangGroup meet their Gang conditions. The `Coscheduling` can't meet +this requirement. + +### Goals +1. Define API to announce Gang scheduling configuration. + +2. Provides a scheduler plugin to achieve Gang scheduling ability. + +### Non Goals and Future Work +1. Provide ability to solve Gang resource deadlock problems with `NonStrict`. + +## Proposal + +### Key concept + +#### Strict and NonStrict + +As mentioned above, in `Strict`, if a pod failed to be scheduled, the pods that have been successfully scheduled in +this scheduling cycle will be rolled back, and the remaining pods waiting for scheduling will be rejected in +PreFilter check util this scheduling cycle passed. We call this mode is `Strict`. + +In `NonStrict`, if a pod failed to be scheduled, it has no impact on any other pod. We will continue to accumulate +the allocated pod until the condition of Gang is met. This process is friendly to Gangs with large number of pods, but it +will increase the risk of resource deadlock between Gangs. For example, the quota of the quota group is 10(quota will be proposed later), +and the user submits three Gangs with 5 pods. Due to various plugin constraints, Gang1\2\3 may allocate resources of 3\3\4 respectively. +Since the quota group's quota is full, there will be no new resource scheduling. We call this is resource deadlock of resource Gang. +In future proposal, we will try to fix this problem. + +#### GangGroup +As mentioned above, Some jobs have complex Gang requirements. For example, a job has several roles. Each role will have several pods +and its own Gang conditions. Jobs also need different roles to form different GangGroups. All pods in a GangGroup can +trigger the bind process only after all roles in a GangGroup meet their Gang conditions. So we introduce `GangGroup` concept, +which allow user to bundle different Gangs together. + +#### After Gang +It should be noted that, if the resource accumulation conditions of Gang are met, then some pods failed in the process of binding, +or some bound pods are preempted\rescheduled, should the constraints of Gang still be effective in the process of resource reallocation? +Because the initial purpose of Gang is to require pods to be pulled up at the same time, if some pods have been pulled up, +then the subsequent Gang behavior is meaningless. Therefore, when once Gang has been satisfied, all subsequent resource allocations +are no longer constrained by Gang rules, and their performance is similar to ordinary pod. + +As mentioned above, `WaitTime` is the max wait time since first pod comes to permit stage. If `WaitTime` is timeout, +scheduler will roll back all assumed pods, update each pod's annotation with `gang.scheduling.koordinator.sh/timeout=true`, and +won't schedule these pods anymore. User should pay attention to this status and delete pods timely. + +### API +#### Definition + +Our original intention is to improve and enhance the ability of the community's original `PodGroup`, so we will be +compatible with the way the community declares the `PodGroup`. We also provide a lighting way to just use annotations to +use Gang feature. + +#### CRD way +User can use `PodGroup` CRD in community to declare a gang: +```go +type PodGroup struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + Spec PodGroupSpec `json:"spec,omitempty"` + Status PodGroupStatus `json:"status,omitempty"` +} +type PodGroupSpec struct { + MinMember int32 `json:"minMember,omitempty"` + MinResources *v1.ResourceList `json:"minResources,omitempty"` + + ScheduleTimeoutSeconds *int32 `json:"scheduleTimeoutSeconds,omitempty"` +} +``` +Pod should use `pod-group.scheduling.sigs.k8s.io` in label to associate with `PodGroup`. + +Also, we introduce some optional definitions as below: +```yaml +gang.scheduling.koordinator.sh/total-number +gang.scheduling.koordinator.sh/mode +gang.scheduling.koordinator.sh/groups +``` +- `gang.scheduling.koordinator.sh/name` indicates the gang's name, it should be emphasized that the name should be in the form of RFC 1123 + +- `gang.scheduling.koordinator.sh/total-number` helps to calculate Gang scheduling cycle in `strict mode`, you can +find more detail in `Data-Structure` chapter. Default equals to `gang.scheduling.koordinator.sh/min-available`. + +- `gang.scheduling.koordinator.sh/mode` determines `Strict` or `NonStrict`. Default is `Strict`. + +- `gang.scheduling.koordinator.sh/groups` describes GangGroups. Default is empty, which means don't need to form a `GangGroup` with others, +and the gangs in one gangGroup can from different namespaces. + +`gang.scheduling.koordinator.sh/total-number`, `gang.scheduling.koordinator.sh/mode`, `gang.scheduling.koordinator.sh/gang-groups` should be found in +`PodGroup`'s annotation if needed. + +##### Example +When user apply a basic gang, the example is as follows: +```yaml +apiVersion: v1alpha1 +kind: PodGroup +metadata: + creationTimestamp: "2022-07-11T18:26:33Z" + name: gang-a + namespace: default +spec: + minMember: 5 + minResources: + cpu: "5" + memory: "2048Mi" + scheduleTimeoutSeconds: 600 +``` + +Let's assume a job has two roles: A and B, each role has several pods. podA belongs to roleA, podB belongs to roleB. +roleA and roleB belongs to one GangGroup, the example is as follows: +```yaml +apiVersion: v1alpha1 +kind: PodGroup +metadata: + creationTimestamp: "2022-07-11T18:26:33Z" + name: gang-a + namespace: namespaceA + annotations: + gang.scheduling.koordinator.sh/total-number: 5 + gang.scheduling.koordinator.sh/mode: Strict + gang.scheduling.koordinator.sh/groups: ["namespaceA/gang-a", "namespaceB/gang-b"] +spec: + minMember: 5 + minResources: + cpu: "5" + memory: "2048Mi" + scheduleTimeoutSeconds: 600 +``` + +It should be noted that, if use Gang feature by `CRD way`, user should let high level operator maintain Gang CRD life circle +like handling `update/create/delete` events. Also, from a Scheduler perspective, scheduler should handle receive-order-issue's +between Gang CRD and pod. For example, if pods arrive to scheduler before Gang CRD, we have to build a fake Gang data structure +temporarily to collect all related pods, and need to suspend the scheduling of pods until parse the configuration from real Gang CRD. + +#### Annotation way +```yaml +gang.scheduling.koordinator.sh/name +gang.scheduling.koordinator.sh/min-available +``` + +The upper definitions are indispensable. We are compatible with `pod-group.scheduling.sigs.k8s.io`, `pod-group.scheduling.sigs.k8s.io/name` +and `pod-group.scheduling.sigs.k8s.io/min-available` in community. We also support new definitions to declare Gang's name and minimum number. + +Also, we introduce some optional definitions as below, most are mentioned above: +```yaml +gang.scheduling.koordinator.sh/waiting-time +gang.scheduling.koordinator.sh/total-number +gang.scheduling.koordinator.sh/mode +gang.scheduling.koordinator.sh/groups +``` + +- `gang.scheduling.koordinator.sh/waiting-time` represents max wait time since first pod comes to permit stage. Default is a global config. + +- `gang.scheduling.koordinator.sh/total-number` helps to calculate Gang scheduling cycle in `strict mode`, you can +find more detail in `Data-Structure` chapter. Default equals to `gang.scheduling.koordinator.sh/min-available`. + +- `gang.scheduling.koordinator.sh/mode` determines `Strict` or `NonStrict`. Default is `Strict`. + +- `gang.scheduling.koordinator.sh/groups` describes GangGroups. Default is empty, which means don't need to form a `GangGroup` with others. + +It should be noted that, the annotation mode's parameter will overwrite CRD's mode if both exist. +And gangGroup should be announced with " gangNamespace" + "/" + "gangName " + +##### Example +When user apply a basic gang, the example is as follows: +```yaml +metadata: + annotations: + gang.scheduling.koordinator.sh/name: gang-a + gang.scheduling.koordinator.sh/min-available: 5 +``` + +Let's assume a job has two roles: A and B, each role has several pods. PodA belongs to roleA, podB belongs to roleB. +roleA and roleB belongs to one GangGroup, the example is as follows: +```yaml +metadata: + annotations: + gang.scheduling.koordinator.sh/name: gang-a + gang.scheduling.koordinator.sh/waiting-time: 3600s + gang.scheduling.koordinator.sh/min-available: 5 + gang.scheduling.koordinator.sh/total-number: 5 + gang.scheduling.koordinator.sh/mode: Strict + gang.scheduling.koordinator.sh/groups: ["namespaceA/gang-a", "namespaceB/gang-b"] +metadata: + annotations: + gang.scheduling.koordinator.sh/name: gang-b + gang.scheduling.koordinator.sh/waiting-time: 3600s + gang.scheduling.koordinator.sh/min-available: 5 + gang.scheduling.koordinator.sh/total-number: 5 + gang.scheduling.koordinator.sh/mode: Strict + gang.scheduling.koordinator.sh/groups: ["namespaceA/gang-a", "namespaceB/gang-b"] +``` + +Assuming a job has two roles: A and B, each role has several pods. podA belongs to roleA, podB belongs to roleB. +roleA and roleB belongs to different GangGroup, the example as follows: +```yaml +metadata: + annotations: + gang.scheduling.koordinator.sh/name: gang-a + gang.scheduling.koordinator.sh/waiting-time: 3600s + gang.scheduling.koordinator.sh/min-available: 5 + gang.scheduling.koordinator.sh/total-number: 5 + gang.scheduling.koordinator.sh/mode: Strict + gang.scheduling.koordinator.sh/groups: "" +metadata: + annotations: + gang.scheduling.koordinator.sh/name: gang-b + gang.scheduling.koordinator.sh/waiting-time: 3600s + gang.scheduling.koordinator.sh/min-available: 5 + gang.scheduling.koordinator.sh/total-number: 5 + gang.scheduling.koordinator.sh/mode: Strict + gang.scheduling.koordinator.sh/groups: "" +``` + +### Implementation Details +#### QueueSortPlugin + +We design an independent plugin to implement the `QueueSort` extension point separately, so that we can integrate +queue sort logic of all plugins, and register them at one time. + +In this proposal, we implement the Less function to gather pods belong to same Gang. The specific queuing rule is: + +1. Firstly, compare the priorities of the two pods, the higher priority is at the front of the queue. + +2. Secondly, compare creationTimestamp of two pods, if pod belongs to a Gang, then we compare creationTimestamp of the Gang, +the one created first will be at the front of the queue. + +3. Finally, compare pod's namespace, if pod belongs to a Gang, then we compare Gang name. + +```go +type QueueSortPlugin interface{ + QueueSort(*QueuedPodInfo, *QueuedPodInfo) bool +} +``` + +#### GangSchedulingPlugin +##### Data-Structure +###### Gang +```go +type Gang struct { + Name string + WaitTime time.Duration + Mode string //Strict or NonStrict + GangGroup []string + MinRequiredNumber int + TotalChildrenNum int + Children map[string]*PodInfo + BoundChildren map[string]*PodInfo + WaitingForBindChildren map[string]*PodInfo + ResourceSatisfied bool + ScheduleCycle int + ScheduleCycleValid bool + ChildrenScheduleRoundMap map[string]int +} +``` + +We design the Gang to record Gang status in scheduler memory. We can get the children pods from "Children" field, and the +`BoundChildren, WaitingForBindChildren` store the pods binding status, which is used to check if the pods can pass permit stage. + +Once Permit stage passed, we will set `ResourceSatisfied=true`, as mentioned above in `After Gang` chapter, this variable is +used for judging whether gang has been satisfied. when handle failover case, if any pod in Gang has been bound, we set `ResourceSatisfied=true`. + +We especially explain `scheduleCycle` and `childrenScheduleRoundMap` field. These fields control Gang's scheduling cycle. For example, +at the beginning, `scheduleCycle` is 1, and each pod's cycle in `childrenScheduleRoundMap` is 0. When each pod comes to PreFilter, +we will check if the pod's value in `childrenScheduleRoundMap` is smaller than Gang's `scheduleCycle`, If result is positive, +we set the pod's cycle in `childrenScheduleRoundMap` equal with `scheduleCycle` and pass the check. If result is negative, means +the pod has been scheduled in this cycle, so we should reject it. With `totalChildrenNum`'s help, when the last pod comes to make all +`childrenScheduleRoundMap`'s values equal to `scheduleCycle`, Gang's `scheduleCycle` will be added by 1, which means a new schedule cycle. + +We continue to explain `scheduleCycleValid` field, during the scheduling, When a pod failed at Filter stage, we will set ScheduleCycleValid to +false in PostFilter stage, which means any pod in this Gang shouldn't be scheduled until it is set to "true", +and the remaining pods should be rejected in PreFilter stage. Only When `scheduleCycle` added by 1, we will reset the `scheduleCycleValid` to true. + +It should be emphasized that `scheduleCycle\scheduleCycleValid\childrenScheduleRoundMap` only work in `Strict`. + +##### GangPlugin + +this is the framework of the Plugin,we cache the Gang info above in the gangCache. +```go +type GangPlugin struct { + frameworkHandler framework.Handle + gangClient gangClient.Interface + podLister listerv1.PodLister + snapshotSharedLister framework.SharedLister + gangCache map[string]*Gang +} +``` +during the whole kubernetes shceduling process,we only need to realize our logic into four extention points as below: +```go +var( + _ framework.PreFilterPlugin = &GangScheduling{} + _ framework.PostFilterPlugin = &GangScheduling{} + _ framework.PermitPlugin = &GangScheduling{} + _ framework.ReservePlugin = &Coscheduling{} +) +type GangScheduling interface{ + ActiveGang(pod *corev1.Pod, state *framework.CycleState) + PreFilter(context.Context, *corev1.Pod) error + PostFilter(ctx context.Context, state *CycleState, pod *v1.Pod, filteredNodeStatusMap NodeToStatusMap) (*PostFilterResult, *Status) + Permit(context.Context, *corev1.Pod) Status + Unreserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) +} +``` +###### **PreFilter** + +if `NonStrict`, we only do step1 and step2: + +- Check whether childes in Gang has met the requirements of minimum number under each Gang, and reject the pod if negative. + +- Check whether the Gang has been timeout(check the pod's annotation,later introduced at Permit section), and reject the pod if positive. + +- Check whether the Gang has met the `scheduleCycleValid` check, and reject the pod if negative. + +- Try update `scheduleCycle`, `scheduleCycleValid`, `childrenScheduleRoundMap` as mentioned above. + + +###### **PostFilter** + +At this point means the pod didn't pass the Filter Plugin, we should: + +- If `Strict`, we will set `scheduleCycleValid` to false and release all assumed pods. + +- If `NonStrict`, we will do nothing. + +###### **Permit** + +Any pod passes Filter stage will come to this stage. Scheduler will calculate all Gangs in GangGroup whether the current +number of assumed-pods in each Gang meets the Gang's minimum requirement. + +- If Gang don't meet the bind-condition, we will give the pod a "Wait" Status with a timeout duration, and the bind +goroutine will keep waiting until the wait is timeout or passed. Then we will run the `ActiveGang` method, it can put all +the pods belong to the Gang which in `schedulableQueue` or `backoffQueue` back to `activeQueue`, so that the pod of Gang +can be continuously scheduled as much as possible. + +It should be noted that, in community, scheduler limit maximum timeout value under 15 min, we may need to hook RunPermitPlugins +to enlarge the timeout when 15 minutes is not enough. Now we record as a known-issue. + +- If Gang meet the bind-condition, we will give every waiting pod a "Success" status, which will let the bind goroutine of +each pod leave the waiting status and continue to run. Also, as mentioned above, we will set Gang's `ResourceSatisfied` to true. + +###### **Un-reserve** + +Both permit stage is timeout and binding failed will lead the pod to un-reserve stage, we can distinguish from Gang's "ResourceSatisfied" field, +if the field is true means binding failed, else means the Gang is timeout. + +- When permit stage is timeout, we will give an annotation like `gang.scheduling.koordinator.sh/timeout=true` to all the pods +belong to the Gang and will release the resource of all the assumed pods. The Gang will not be scheduled anymore, +user should manually handle the timeout event. + +- When binding failed, as mentioned above, the collection of Gang's resource is over, we will do nothing except roll back +the failed pod resource. + +###### **Init** + +We will register pod's event handler to watch pod event for updating Gang. + +## Unsolved Problems + +## Alternatives +User can choose use Gang by `Strict` and `NonStrict` case by case. diff --git a/versioned_docs/version-v1.4/designs/koordinator-yarn.md b/versioned_docs/version-v1.4/designs/koordinator-yarn.md new file mode 100644 index 000000000..4724e8e3b --- /dev/null +++ b/versioned_docs/version-v1.4/designs/koordinator-yarn.md @@ -0,0 +1,76 @@ +# Koordinator YARN Copilot + +## Introduction +Koordinator has supported hybrid orchestration workloads on Kubernetes, so that batch jobs can use the requested but +unused resource as koord-batch priority and BE QoS class to improve the cluster utilization. However, there still lots +of applications running beyond K8s such as [Apache Hadoop YARN](https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/YARN.html). +As a resource management platform in BigData ecosystem, YARN has supported numbers of computing engines including +MapReduce, Spark, Flink, Presto, etc. Although some computing engines has provided K8s operators that can submit jobs +into the K8s, the Hadoop YARN ecosystem is still active, which can be shown from that most cloud providers are still +selling commercial products like [E-MapReduce](https://www.aliyun.com/product/bigdata/emapreduce). + +In order to extend the co-location scenario of, the Koordinator community, together with developers from Alibaba Cloud, +Xiaohongshu, and Ant Financial, set up the project for running Hadoop YARN jobs by koord-batch resources with other K8s +pods, which can improve the cluster resource utilization by providing `batch` resource to Haddop YARN. This project has +been widely used in Xiaohongshu product environment. + +## Technical Details + +### Principles +- Keep YARN as the portal of job submission. +- Based on the open source version of Hadoop YARN, no intrusive modifications into YARN. +- The co-location resources provided by Koordinator can be used by both K8s Pod and YARN tasks, which means different types of applications can run in the same node. +- QoS policies of Koordlet should be compatible for YARN tasks. + +![image](/img/hadoop-k8s.svg) + +### Resource Allocation +In Koordinator, batch resources of nodes are dynamically calculated by koord-manager based on the node resource load and +updated as K8s extended-resource on Node. The `koord-yarn-operator` component will synchronize the batch resource to +YARN RM, so that YARN tasks can request these batch resources. Since the K8s scheduler and the YARN scheduler share the +amount of batch allocatable resource, the allocated information of schedulers should be known by others. + +1. `koord-manager` calculates the original batch total `origin_batch_totaland`, and records it as node annotation of K8s. +2. `koord-yarn-operator` collects the amount of resources that YARN nodes have allocated from YARN RM `yarn_requested`, and records it as node annotation of K8s. +3. Before `koord-manager` updates the total batch resources of K8s, the resources that have been allocated by YARN must be excluded: `k8s_batch_total = origin_batch_total – yarn_requested`. +4. Before `koord-yarn-operator` updates resources to YARN RM, also, the amount of resources that K8s has allocated must be excluded: `yarn_batch_total = origin_batch_total – k8s_batch_requested`. + +![image](/img/koord-yarn-operator.svg) + +Since there are multiple schedulers working in cluster, `batch` priority resources may be overcommited due to the +sequence of resource synchronization. `koordlet` will perform arbitration for the allocated resource on node side. +However, unlike the arbitration of `kubelet`, `koordlet` use the QoS policy as arbitration methods with +the goals of "avoiding interference" and "ensuring the resource quality of batch priority", rejecting or evicting pods +according to the realtime status of resource usage. + +### Node Runtime +Node Manager works on node side in YARN cluster, which is responsible for the life cycle management of tasks. +Under the K8s co-location scenario, NM will be deployed as DaemonSet. The resource management of NM and YARN tasks will +be separated into different cgroups for the purpose of fine-grained control, so that NM only needs to request resources +according to its own consumption. + +![image](/img/node-manager-runtime.svg) + +Koordinator requires YARN NM to enable LinuxContainerExecutor and specify the cgroup path under best-effort hierarchy, +because `kubelet` and `koordlet` use cgroups for QoS managenet, so that all YARN tasks can also be managed like other K8s Pods. + +### QoS Strategies +Currently, `koodlet` supports a series of QoS policies, which also need to be adapted for YARN tasks. For resource +isolation parameters, such as Group Identity, Memory QoS, L3 Cache isolation, etc., `koordlet` will be adapted +according to the cgroup hierarchy. For dynamic strategies such as eviction and suppression, `koordlet` will add a new +module `yarn-copilot-agent`, which is used for adaption for YARN tasks operation, including meta-information collection, +metrics collection, task eviction operations, etc. of YARN tasks. + +All QoS policies are still managed in `koordlet`, and relevant modules in `koordlet` communicate with +`yarn-copilot-agent`. Also, the API design of `yarn-copilot-agent` will keep scalability and can be used for connecting +other resource frameworks in the future. + +![image](/img/yarn-copilot-agent.svg) + +`koordlet` will support all QoS policies for YARN scenarios in subsequent versions. + +## Join US +Koordinator has release some features on K8s and YARN co-location in latest versions of each component, the community is +still working on the iteration of other features in following milestions. If you have and questions or want to participate +in co-construction, you are welcome to submit an [issue](https://github.com/koordinator-sh/yarn-copilot/issues) or +comment in the [discussion](https://github.com/koordinator-sh/koordinator/discussions/1297). diff --git a/versioned_docs/version-v1.4/designs/koordlet-overview.md b/versioned_docs/version-v1.4/designs/koordlet-overview.md new file mode 100644 index 000000000..2ea0cbdfc --- /dev/null +++ b/versioned_docs/version-v1.4/designs/koordlet-overview.md @@ -0,0 +1,56 @@ +# Koordlet + + +## Summary +Koordlet is a DaemonSet deployed in Kubernetes node, which is used for co-location resource overcommitment, interference +detection, QoS guarantee, etc. It is composed of several modules which are responsible for information collection, +data profiling and QoS management independent. Some modules also provides a framework scaffold, which provides a set +of plugin for extension (such as the "QoS Manager"), so that new strategies can be easily added. + +## Architecture +![image](/img/koordlet-arch.svg) + +## Modules + +### Metrics Advisor +Metric Advisor provides the basic information of resource usage and performance characteristic of node, pods and containers. +It is an independent module that collects, processes and exports resource profile periodically. It also detects the +interference of running containers such as CPU scheduling, memory allocation latency and Pressure Stall Information(PSI). +The information will be widely used for resource overcommitment and QoS guaranteed plugins. + +### Storage +Storage manages the information from Metrics Advisor and States Informer, providing APIs for CURD and GC outdated data +periodically. There are two types of data: `static` and `time-series`. Time-series type keeps historical data for +statistics purpose, such as CPU and memory usage. Static type includes the of status information node, pod and container, +such as CPU info of node, metadata of pod. + +### States Informer +States Informer syncs node and pod status from kube-apiserver and kubelet, and saves data into Storage as `static` type. +This module should remain relatively stable over developing iterations compared with others. + +### QoS Manager +QoS Manager coordinates a set of plugins which are responsible for SLO guarantee by priority, mitigating interference +among pods. Plugins dynamically tunes the "knobs" of resource parameters on different scenarios, according to resource +profiling, interference detection results and SLO configuration. For each plugin, it always produces execution plans for +"knobs" tuning. QoS Manager also act as an arbitrator among multiple execution plans, consolidating the duplicates and +resolving the conflicts. + +QoS Manager could be the most frequently iterated module, with new plugins extended, strategies algorithm updated and +policy execution ways added. A new plugin should implement the interface which contains a series of standard APIs, so +that the "core" can be kept simple and maintainable. Advanced plugins such as those for interference detection purpose +will get more complex as time goes by, which might becomes an independent module after the incubation has been already +stabled in QoS Manager. + +### Metrics Reporter +Metrics Reporter reads historical metric and state data from Storage, then merges and sends them to apiserver, +which will be consumed by Koordinator Manager for resource overcommitment model management. Metrics Reporter also +supports multiple processing algorithms for different co-location scenarios. + +### Runtime Hooks +Runtime Hooks act as the back-end server of Runtime Hook Manager. Runtime Hook Manager is a CRI Proxy, which +intercepting the CRI request, calling back-end server to inject policies, such as setting resource isolation +parameters by pod priorities, applying resource allocation policies. Runtime Hooks provide a framework to maintain +different kinds of policies, and provides flexible extension points during the lifecycle of containers. + +#### e.g. LLC Isolation Injections during Pod Lifecycle +![image](/img/llc-isolation.svg) diff --git a/versioned_docs/version-v1.4/designs/load-aware-scheduling.md b/versioned_docs/version-v1.4/designs/load-aware-scheduling.md new file mode 100644 index 000000000..3ca5bdc56 --- /dev/null +++ b/versioned_docs/version-v1.4/designs/load-aware-scheduling.md @@ -0,0 +1,115 @@ +# Load-aware Scheduling + +## Summary + +Although Koordinator provides the co-location mechanism to improve the resource utilization of the cluster and reduce costs, it does not yet have the ability to control the utilization level of the cluster dimension. This proposal defines a scheduling plugin to help Koordinator achieve this capability. + +## Motivation + +Koordinator oversells some resources through the co-location mechanism. Although it can improve the utilization of nodes, Best Effort workloads may also interfere with latency-sensitive applications. + +### Goals + +1. Provides a configurable scheduling plugin to help control cluster resource utilization. +2. Utilization control mechanism that supports multiple resources. +3. Control resource utilization at a safe threshold. + +### Non-Goals/Future Work + +1. Help the plugin to achieve more reasonable estimates and better results through application profiles. This is left as a follow-up work that will be done under a different proposal. + +## User stories + +### Story 1 + +When the resource utilization of the node has reached a high threshold, serious resource contention will occur between the running workloads on the node. For example, best effort workloads are frequently suppressed due to higher-priority applications requiring resources. As a result, best effort workloads are timed out or even forced to end; or a latency-sensitive application will suffer severe performance degradation under high utilization, failing to meet external SLAs. This should be avoided. + +### Story 2 + +Workloads in a co-located cluster have different resource requirements. Typical CPU-bound workloads expect to use more CPU, while other types of workloads may use more memory. It is possible that the utilization of CPU resources is relatively high, while the utilization of memory resources is relatively low. In this scenario, the unbalanced utilization of resources will affect the effect of scheduling, and may even lead to the problem that resources are idle but Pods cannot be scheduled. + +### Story 3 + +Koordinator defines NodeMetric CRD to describe the resource usage of nodes and is regularly updated by koordlet. However, if there are many Pods scheduled to cold nodes (that is, nodes with low resource utilization) during the update cycle, when these Pods start running, the resource utilization of these nodes may exceed the expected threshold. As a result, the runtime quality of these pods is not as good as expected. + +### Story 4 + +The koordlet may not be able to report the latest resource usage due to node exception. Such nodes should be avoided during scheduling to prevent unexpected exceptions. + +## Implementation Details + +![image](/img/load-aware-scheduling-arch.svg) + +The scheduling plugin filters abnormal nodes and scores them according to resource usage. This scheduling plugin extends the Filter/Score/Reserve/Unreserve extension points defined in the Kubernetes scheduling framework. + +### Filter unhealthy nodes + +By default, abnormal nodes are filtered, and users can decide whether to enable or not by configuring as needed. + +- Filter nodes where koordlet fails to update NodeMetric. If the configuration enables, the plugin will exclude nodes with *nodeMetrics.status.updateTime >= LoadAwareSchedulingArgs.nodeMetricExpirationSeconds*. + +- Filter nodes by utilization thresholds. If the configuration enables, the plugin will exclude nodes with *latestUsageUtilization >= utilizationThreshold*. In the filtering phase, only the resource utilization is obtained from the latest NodeMetric, and the resource usage of the allocated but not yet counted Pods does not participate in the calculation, so as to allocate resources to the newly created Pods and avoid scheduling failure due to unreasonable estimates. + +### Score algorithm + +The core logic of the scoring algorithm is to select the node with the smallest resource usage. However, considering the delay of resource usage reporting and the delay of Pod startup time, the resource requests of the Pods that have been scheduled and the Pods currently being scheduled within the time window will also be estimated, and the estimated values will be involved in the calculation. + +### Plugin configuration + +```go + +type LoadAwareSchedulingArgs struct { + metav1.TypeMeta + + FilterExpiredNodeMetrics *bool `json:"filterExpiredNodeMetrics,omitempty"` + NodeMetricExpirationSeconds *int64 `json:"nodeMetricExpirationSeconds,omitempty"` + ResourceWeights map[corev1.ResourceName]int64 `json:"resourceWeights,omitempty"` + UsageThresholds map[corev1.ResourceName]int64 `json:"usageThresholds,omitempty"` + EstimatedScalingFactors map[corev1.ResourceName]int64 `json:"estimatedScalingFactors,omitempty"` +} + +``` + +- `FilterExpiredNodeMetrics` indicates whether to filter nodes where koordlet fails to update NodeMetric. +- `NodeMetricExpirationSeconds` indicates the NodeMetric expiration in seconds. When NodeMetrics expired, the node is considered abnormal.Default is 180 seconds. +- `ResourceWeights` indicates the weights of resources. The weights of CPU and Memory are both 1 by default. +- `UsageThresholds` indicates the resource utilization threshold, the default for CPU is 65%, and the default for memory is 95%. +- `EstimatedScalingFactors` indicates the factor when estimating resource usage. The default value of CPU is 85%, and the default value of Memory is 70%. + +`FilterExpiredNodeMetrics` controls the filter behavior, if it is false, `NodeMetricExpirationSeconds` can still be used when scoring. + +### Custom NodeMetric update Period + +This plugin is dependent on NodeMetric's reporting period. Different reporting periods need to be set according to different scenarios and workloads. If the reporting period is relatively long, koordlet needs to aggregate within the reporting period to ensure the effect of the metrics. Therefore, NodeMetricSpec needs to be extended to support user-defined reporting period and aggregation period. Users can modify `slo-controller-config` to complete the corresponding configuration, and the controller in `koord-manager` will be responsible for updating the reporting period and aggregation period fields of NodeMetrics of related nodes. + +```go +// NodeMetricSpec defines the desired state of NodeMetric +type NodeMetricSpec struct { + // CollectPolicy defines the Metric collection policy + CollectPolicy *NodeMetricCollectPolicy `json:"metricCollectPolicy,omitempty"` +} + +// NodeMetricCollectPolicy defines the Metric collection policy +type NodeMetricCollectPolicy struct { + // AggregateDurationSeconds represents the aggregation period in seconds + AggregateDurationSeconds *int64 `json:"aggregateDurationSeconds,omitempty"` + // ReportIntervalSeconds represents the report period in seconds + ReportIntervalSeconds *int64 `json:"reportIntervalSeconds,omitempty"` +} +``` + +### Custom node usage thresholds + +Currently, the resource utilization thresholds of nodes are configured based on experience to ensure the runtime quality of nodes. But there are also ways to evaluate the workload running on the node to arrive at a more appropriate threshold for resource utilization. For example, in a time-sharing scenario, a higher threshold can be set to allow scheduling to run more best effort workloads during the valley of latency-sensitive applications. When the peak of latency-sensitive applications comes up, lower the threshold and evict some best effort workloads. In addition, 3-sigma can be used to analyze the utilization level in the cluster to obtain a more appropriate threshold. + +Define Annotation supports user-defined node resource utilization thresholds. + +```go +const ( + AnnotationCustomUsageThresholds = "scheduling.koordinator.sh/usage-thresholds" +) + +type CustomUsageThresholds struct { + UsageThresholds map[corev1.ResourceName]int64 `json:"usageThresholds,omitempty"` +} +``` \ No newline at end of file diff --git a/versioned_docs/version-v1.4/designs/multi-hierarchy-elastic-quota-management.md b/versioned_docs/version-v1.4/designs/multi-hierarchy-elastic-quota-management.md new file mode 100644 index 000000000..6c8cebc88 --- /dev/null +++ b/versioned_docs/version-v1.4/designs/multi-hierarchy-elastic-quota-management.md @@ -0,0 +1,342 @@ +# Multi Hierarchy Elastic Quota Management + +## Summary +When several users or teams share a cluster, fairness of resource allocation is very important. This proposal provides +multi-hierarchy elastic quota management mechanism for the scheduler. +- It supports configuring quota groups in a tree structure, which is similar to the organizational structure of most companies. +- It supports the borrowing / returning of resources between different quota groups, for better resource utilization efficiency. +The busy quota groups can automatically temporarily borrow the resources from the idle quota groups, which can improve the +utilization of the cluster. At the same time, when the idle quota group turn into the busy quota group, it can also automatically +take back the "lent-to" resources. +- It considers the resource fairness between different quota groups. When the busy quota groups borrow the +resources from the idle quota groups, the resources can be allocated to the busy quota groups under some fair rules. + +## Motivation + +### Compared with competitors + +#### Resource Quotas +[Resource Quotas](https://kubernetes.io/docs/concepts/policy/resource-quotas/) provides the ability to restrain the upper +limit of resource usage in one quota group. The quota group resource usage aggregated based on the pod resource configurations. +Suppose there are still free resources in the cluster, but the resource usage of this quota group is close to the limit. +The quota group cannot flexibly borrow the idle resources from the cluster. The only possible way is to manually adjust the +limit of the quota group, but it is difficult to determine the timing and value of the adjustment when there are lots of +quota groups. + +#### Elastic Quota +[Elastic Quota](https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/kep/9-capacity-scheduling/README.md#goals) +proposed concepts of "max" and "min". "Max" is the upper bound of the resource consumption of the consumers. "Min" is the minimum +resources that are guaranteed to ensure the functionality/performance of the consumers. This mechanism allows the workloads +from one quota group to "borrow" unused reserved "min" resources from other quota groups. The unused "min" of one quota group +can be used by other quota groups, under the condition that there is a mechanism to guarantee the "victim" quota group can +consume its "min" resource whenever it needs. + +If multiple quota groups need borrow unused reserved "min" resources from other quota groups at the same time, +the implementation strategy is FIFO, which means that one quota group may occupy all "borrowed-from "resources, +while other quota groups cannot borrow any resources at all from the cluster. + +Neither of the above support multi hierarchy quota management. + +### Goals +1. Define API to announce multi hierarchy quota configuration. + +2. Provides a scheduler plugin to achieve multi hierarchy quota management ability. + +### Non-goals/Future work +Users have two ways to manage GPU quotas. One is to only declare the number of GPU cards in the quota group, but do not +care about the specific card type assigned. The other is to specify the quotas required by different card types. For example, +suppose user A\B both has 10 GPU quota, and cluster has two GPU types A100\V100. quotaA only declare 10 GPU quota, so in the +scheduling process, as long as the total number of GPU cards allocated to A is 10, no matter what the allocation ratio of +a100\v100 is, it will meet the expectation. QuotaB also declare 10 GPU quota, but has more details with V100 is 5 and A100 is 5, +so the maximum allocation of V100 is 5 and A100 is 5 in the scheduling will meet the expectation. + +We know that the GPU card type reflected by the label or annotation on the node, not in the resource dimension, so we can't +simply configure nvidia.com/gpu-v100, nvidia.com/gpu-a100 directly into the quota group's resource dimension. + +What's more complicated is that in a cluster, there will be multiple quota groups like A\B at the same time, +These two modes will conflict. Suppose that the cluster resource has 20 cards, including 10 cards for A100 and 10 cards for V100. +If the scheduler first assigns 10 cards to quota groupA with all V100, then quota group B's V100 resource has no way to be guaranteed, +which obviously does not meet expectations. Therefore, we need to solve the problem that if the above two modes coexist, +the quota mechanism can still work normally. + +The above problems will be solved in the next proposal. + +## Proposal + +### Key Concept\User Stories +1. Each quota group declares its own "min" and "max". The semantics of "min" is the quota group's guaranteed resources, +if quota group's "request" less than or equal to "min", the quota group can obtain equivalent resources to the "request". +The semantics of "max" is the quota group's upper limit of resources. We require "min" to be less than or equal to max. + +2. We define "request" as the sum pod's request in the quota group. When some quota groups "request" is less than "min", and some +quota groups "request" is more than "min", the unused resources of the former can be lent to (or you can choose not to share) the +latter. The latter should use these resources according to the fair rule. When the former needs to use the "lent-to" resources, +the latter should also return the "borrowed-from" resources according to the fair rule. + +3. We define the "runtime" as the current actual resource that can be used by the quota group. For a quota group whose "request" +is less than min, the value of "runtime" is equal to "request". That is to say "request" should be unconditionally satisfied +if the "request" is less than "min". For a quota group whose "request" is greater than "min", the value of "runtime" is between +"min" and "max", and the part exceeding "min" is based on its own "request", the "lent-to" resources, and the ability of +other quota groups to compete for "lent-to" resources. This will be described in detail below. + +4. Hierarchy is very important in a resource-shared cluster. Suppose that the cluster shared by multiple departments, and +each department has multiple teams. If each team is a quota group, we naturally hope that the relationship between departments +and teams is tree shaped. In this way, no matter how to add, delete or adjust quota groups within the department, it is an +internal matter of the department. The cluster administrator only needs to be responsible for the quota configuration at the +level of departments, and the quota group's configuration can delegate power to the department itself. Moreover, tree can +help us easily see the summary of resources from the perspective of departments when there are lots of teams in one department. + +Another advantage of tree structure is that we can control the scope of the "lent-to" resource. For example, a department only +wants to its quota groups can borrow resources from each other, while the resources of the department do not want to be lent +to other departments. This is very convenient for the tree structure. It should be pointed out that although two levels can +meet most scenarios (the more levels, the higher the maintenance complexity), we will support that the height of the quota-tree +is arbitrary. + +### Implementation Details + +#### Calculate RuntimeQuota + +We use an example to introduce how to calculate "runtime". Suppose the cluster total resource is 100, and has 4 quotas, +the configuration and "request" of each quota group described as below: + +![image](/img/runtimequota1.jpg) + +We first calculate the "min" part of "runtime". It should be like as below: + +![image](/img/runtimequota2.jpg) + +Then we find quota groupA can lent 5 quotas to B\C\D, and the cluster has 40 quotas to allocate, so the sum is 45 for B\C\D +to share. We introduce a new field to represent the allocation fairness, which is called "shared-weight". "shared-weight" determines +the ability of quota groups to compete for shared resources. That is to say, B/C/D will allocate resources in the cluster according +to its "shared-weight". + +For example, assuming that the weights of B\C\D are 60\50\80 + +- B can get 45 * 60 / (60 + 50 + 80) = 14 + +- C can get 45 * 50 / (60 + 50 + 80) = 12 + +- D can get 45 * 80 / (60 + 50 + 80) = 19 + +However, quota group B only need 5 more due to request is 20 and min is 15, and quota group C and D are still hungry, +so quota group B can share 14 - 5 = 9 to C and D. + +![image](/img/runtimequota3.jpg) + +quota group C and D can still share the remained quota of 9 by allocation proportion, which C get 9 * 50 / (50 + 80) = 3, +D get 9 * 80 / (50 + 80) = 6, and we get the runtime of each quota group finally. + +![image](/img/runtimequota4.jpg) + +The whole process can be summarized as follows: + +1. The quota divided into two categories, one is whose "request" is less than "min", we call it "lent-to-quotas". The other is +whose "request" is greater than "min", we call it "borrowed-quotas". + +2. Calculate the "runtime" of each quota group not exceed "min", so we can get how many resources can be lent to "borrowed-quotas". + +3. The "borrowed-quotas" share the resources by allocation proportion. + +4. If the new "runtime" is larger than "request", there will be new resources which can be lent to the rest "borrowed-quotas". + +It is very difficult to manage the weight of thousands of quota groups in a company. Therefore, we need to set a default value +for the "shared-weight". According to our experience in online operations, using max as the default "shared-weight" of the quota +group can satisfy most scenarios. In this way, "max" has both the meaning of resource ceiling and allocation proportion: the +larger the "max" is, the more resources it wants. For individual special scenarios, the resource administrator can adjust the weight. + +It must be pointed out that if the cluster resources suddenly decrease due to node failure, the sum of "min" may be +greater than the total resources of the cluster. If this case happens, we can't grantee "min" of each quota group actually. +So we will reduce the "min" of each quota group in a moderate proportion, which is to ensure that the sum of +"min" actually in effect is less than the total resources of the cluster. + +We need to introduce the concept of "sys-group". "sys-group" means that the "min" of this quota group is infinite, +and its request will never be bound by the quota. It is usually used for system level pods. When the scheduler starts, +the "sys-group" will be created by default not only in scheduler memory, but also try create the quota group crd. +Its "min" and "max" are INT_MAX. At the same time, its "min" will not be reduced in proportion to the above process. +The real available total resource of normal quota groups is the cluster total resource minus the "used" of the "sys-group". + +We also need to introduce the concept of "default-group". If the pod cannot find a matching quota group, it will be +matched to the "default-group". the "default-group" will be created by default not only in scheduler memory, but also try +create the quota group crd. Its "min" and "max" has default value, users can modify them on demand. + +#### Hierarchy +We can organize quota groups using quota-tree, each quota group has its own configuration. Currently, we only allow leaf +nodes to submit jobs. An example is as below: + +![image](/img/quotatree1.jpg) + +When we calculate the "request" of each quota group. We first count the requests of each parent group from the bottom up, +which is the accumulation of mathematical min(child group request, child group max). + +![image](/img/quotatree2.jpg) + +Then we calculate the "runtime" from top to bottom. The "runtime" of the parent quota group is the total resources of the +child quota groups. First we calculate parent quota group's "runtime". + +![image](/img/quotatree3.jpg) + +Then we calculate child quota group's "runtime". + +![image](/img/quotatree4.jpg) + +#### Min Guarantee and Preemption +Considering the following situations, suppose that the cluster has two quotas group A\B. At t0 time, only quota groupA has job +submission, it can borrow from quota group B's resource, and the "request" and "used" of quota group are both 100 as below: + +![image](/img/quotaguarantee1.jpg) + +At t1 time, quota groupB has job submission too, so the "runtime" of quota group A\B is both 50. However, if quota +groupA don't return resource back, quota groupB can't assign any resource cause node resource occupied by the quota groupA. + +![image](/img/quotaguarantee2.jpg) + +The solution is that we will monitor the relationship between "used" and "runtime" of each quota group in the background thread. +If quota group's "used" continues to be greater than "runtime", we will start the forced recycling mechanism to kill +several pods in the order of priority from low to high until the "used" is less than or equal to "runtime". If some pods +in the quota group do not want to be recycled, we require such pods can only use resource up to "min". By default, we +assume all pods can use resource beyond "min" if "runtime" larger than "min". + +We do not adopt the cross quota preemption method to solve the problem that when quota group "used" is less than "runtime" +(to preempt the quota group whose "used" is greater than the "runtime"). Due to each quota group has an accurate runtime, +we can accurately recycle the overused resources of each quota group. This is more direct than preemption. + +In addition, we do not think that cross quota preemption is worth recommending. In principle, the priorities of different +quota groups are not comparable, because they may come from different business lines. The high priority of this business line +is not more important than the low priority of other business lines. Only priorities within a quota group have comparative +significance. So we will not support cross quota preemption temporary. Moreover, in inner quota preemption, we will limit +existUsed - preempted + preempt smaller than runtime. + +It can be seen from the above, if "min" of the quota group is not equal to "max", the "runtime" part exceeding "min" may +recycled by the scheduler. + +#### Configuration Limit +We introduce several constraints to ensure that the quota mechanism works properly. + +1. Except for the first level quota group, we require that the sum of "min" of all sub quota groups should be less than or +equal to the "min" of parent group. The reason for excluding the first level quota group is that the cluster resources +cannot avoid jitter. If the cluster resource reduced, we don't want to hinder the update of the quota groups. + +2. The "max" of child quota group can be larger than the "max" of parent group. Consider the following scenario, there are +2 subtrees in the cluster, "dev-parent" and "production-parent". Each subtree has several "quota-groups". When "production" +is busy, we can limit the resource use of the "dev" by only decreasing the "max" of "dev-parent", instead of decreasing +the "max" of each sub quota group of "dev-parent". + +3. Parent group cannot run pod. We did receive a request to allow the parent group to submit jobs. The priority of the +parent group's self jobs is higher than that of all the sub-groups, which means that the parent group's self jobs can +preempt the "runtime" of the sub-group's jobs at any time. This is somewhat similar to the hierarchical relationship of +"Town City province". Due to complexity,we do not support this issue for now. + +4. The parent of node can only be parent group, not child group. + +5. A quota group can't be converted on the attribute of parent group\child group. + +6. We allow a node on the quota tree to freely change its parent node, as long as it does not break the existing detection rules. + +We will introduce a new "web-hook" to check the configuration limitation. + +#### Extension Point + +##### PreFilter +We will check if the (Pod.request + Quota.Used) is less than Quota.Runtime. If not, the scheduling cycle of Pod will fail. + +##### PostFilter +We will re-implement the method selectVictimsOnNode in defaultPreempt. The original selectVictimsOnNode method selects all +the pods with the lower priority than the preemptor’s priority as potential victims in a node. For now, we only allow +inner-quota-group preemption. + +##### Cache and Controller +1. We will watch the event of quota group and pod to calculate "runtime" of each quota group. +2. We will create a thread to update quota group crd to display "request\used\runtime" periodicity. +3. We will create a thread to monitor "used" and "runtime" of each quota group. If quota group's "used" continues to be +greater than "runtime", we will start the forced recycling mechanism to kill several pods in the order of priority from +low to high until the "used" is less than or equal to "runtime". + +### API + +#### Quota +We will reuse [Elastic Quota](https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/kep/9-capacity-scheduling/README.md#goals) +'s crd to declare quota group. + +```go +type ElasticQuota struct { + metav1.TypeMeta + metav1.ObjectMeta + Spec ElasticQuotaSpec + Status ElasticQuotaStatus +} + +type ElasticQuotaSpec struct { + Min v1.ResourceList + Max v1.ResourceList +} + +type ElasticQuotaStatus struct { + Used v1.ResourceList +} +``` + +we will also add new annotation and labels to achieve our desired functionality. +```yaml +annotations: + quota.scheduling.koordinator.sh/runtime: {cpu:4, memory: 8Gi} + quota.scheduling.koordinator.sh/shared-weight: {cpu:4, memory: 8Gi} +labels: + quota.scheduling.koordinator.sh/is-parent: false + quota.scheduling.koordinator.sh/parent-quota-name: "parent" + quota.scheduling.koordinator.sh/allow-lent-resource: true +``` +- `quota.scheduling.koordinator.sh/runtime` is updated by the scheduler. It reflects the "runtime" of the quota group. +- `quota.scheduling.koordinator.sh/is-parent` is disposed by the user. It reflects the "child\parent" attribute of the quota group. Default is child. +- `quota.scheduling.koordinator.sh/parent-quota-name` is disposed by the user. It reflects the parent quota name. Default is root. +- `quota.scheduling.koordinator.sh/shared-weight` is disposed by the user. It reflects the ability to share the "lent to" resource. Default equals to "max". +- `quota.scheduling.koordinator.sh/allow-lent-resource` is disposed by the user. It reflects whether quota group allows lent unused "min" to others. + +Here is a example: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: test + namespace: test + annotations: + quota.scheduling.koordinator.sh/runtime: {cpu:4, memory: 8Gi} + quota.scheduling.koordinator.sh/shared-weight: {cpu:4, memory: 8Gi} + labels: + quota.scheduling.koordinator.sh/is-parent: false + quota.scheduling.koordinator.sh/parent-quota-name: "parent" + quota.scheduling.koordinator.sh/allow-lent-resource: true +spec: + max: + cpu: 20 + memory: 40Gi + nvidia.com/gpu: 2 + min: + cpu: 10 + memory: 20Gi + nvidia.com/gpu: 1 +``` + +#### Pod +We introduce a new label on the pod to associate pod with quota group: +```yaml +labels: + quota.scheduling.koordinator.sh/quota-name: "test1" +``` + +if pod's don't have the label, we will follow [Elastic Quota](https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/kep/9-capacity-scheduling/README.md#goals) +using namespace to associate pod with quota group. + +### Compatibility +We are fully compatible with [Elastic Quota](https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/kep/9-capacity-scheduling/README.md#goals) 's interface. +If pod's don't have the "quota-name" label, we will use the namespace to associate pod with quota group. If the pod has +the "quota-name" label, we will use it to associate pod with quota group instead of namespace. If we can't find the +matched quota group, we force the pod to associate with the "default-group". + +## Unsolved Problems +Please see Non-goals/Future work. + +## Alternatives + +## Implementation History + +## References diff --git a/versioned_docs/version-v1.4/designs/node-prediction.md b/versioned_docs/version-v1.4/designs/node-prediction.md new file mode 100644 index 000000000..9bda2cc8a --- /dev/null +++ b/versioned_docs/version-v1.4/designs/node-prediction.md @@ -0,0 +1,278 @@ +# Node Prediction + +## Summary + +The *node prediction* is proposed to both improve the node utilization and avoid overloading. By profiling the +tendency of the node metrics, we can estimate the peak usage and implement more efficient over-commitment policy. + +## Motivation + +Scheduling pods with setting appropriate resource requirements is truly hard to follow. Underestimating requests can +bring performance issues. However, overvaluing requests is likely to cause resource waste and low efficiency. One +common approach is using Vertical Pod Autoscaler (VPA) to autopilot the resource requirements for the pods of the same +workload. The VPA optimizes the resource requirements of the pod according to the pod metrics of the same workload. It +estimates the pod usage and specifies proper resource requirements. It works well when we want to optimize the resource +requirements of workloads. However, most VPA approaches try to abandon the time series attribute from the pod metrics +and generate a relatively static requests/limits that should guarantee to make no bad ignoring the timing. It leaves +the usage-to-limit gap, i.e. the gap between the recommended pod request with the real-time pod usage, and the +well-known pooling effect, i.e. the gap between the sum of the pod usages with the node usage. Inspired by +[Google's work](#references) in the EuroSys'21, we propose the node prediction in Koordinator to conquer these two +gaps. + +### Goals + +- Define the node prediction API. +- Propose an online history-based-optimized (HBO) prediction model. +- Clarify how the Mid-tier resources are calculated with the prediction. + +### Non-Goals/Future Work + +- Propose a time-series-forecasting-based or offline prediction model. + +## User Stories + +### Story 1 + +As a cluster administrator, there are many web service pods allocating almost node resources. Whereas, the node +utilization is low since most allocated resources are not actually used. To improve node utilization, I want to reclaim +the unused resources to submit some low-priority online-service pods and Flink jobs. However, I am concerned with the +risks of over-utilization bringing machine overload which may cause the performance degradation and hurt the pod QoS. + +### Story 2 + +As a Kubernetes developer, I want to support the long-term load balancing in the scheduler. Thus, I need the information +that which nodes should be idle for a long time. + +## Design + +### Design Principles + +- The node prediction is low-cost and can be implemented in the Koordlet. +- The node prediction is pluggable. Users can replace the default model to customize the prediction. + +### Architecture + +The node prediction is implemented mainly in the Koordlet and Koord-Manager. The architecture is as below: + +![image](/img/node-prediction.svg) + +- Koordlet: The agent runs on the node. It implements the metrics collection, metrics storage, and predict server. + - Metrics Advisor: It collects the cpu/memory usage of the node and running pods. It stores the collected metrics in the Metric Cache. + - Metric Cache: It stores the node and pod metrics in a TSDB, which allows other modules to query the metrics later. + - Predict Server: With the node and pod metrics retrieved from the Metric Cache, it calculates and checkpoints the predicted result based on the prediction model. + - States Informer: It maintains the metadata of the node and the pods. It also reports the latest prediction periodically to the kube-apiserver. +- Koord-Manager: The controller runs on a master node. + - Configuration delivery: It maintains the prediction and colocation strategies and distributes the node strategy onto the NodeMetric. + - Resource Calculator: It fetches the node prediction result, and calculates the resource allocatable of the reclaimed resources (i.e. Mid-tier resource). +- Koord-Scheduler: It schedules the pod with different priority bands (e.g. Prod, Mid, Batch). It can enable load-aware scheduling to balance the over-committed nodes' utilization. + +#### Workflow + +In the koordlet, stages to update the node prediction are as follows: + +1. Histogram initialization: The predict server initializes a set of histograms for CPU and memory. For implementing `N-Sigma_v1`, it initializes decayed histograms only for the node and priority classes. While implementing `N-Sigma_v2`, it initializes histograms both for the node and every running pod. +2. Metrics collection: The metrics advisor collects the usage statistics of node and pods and stores them as metric points into the metric cache every CollectInterval (e.g. 1s). +3. Histogram updating: The predict server fetches the node metrics and pod metrics of latest HistogramUpdateInterval (e.g. 30s). Then it uses the aggregated result to update the decayed histograms. +4. Periodical reporting: The states informer fetches node metrics and the last histograms for the node and priority classes every ReportingInterval (e.g. 60s). Then it reports the complete NodeMetric status with last node prediction info to the kube-apiserver. +5. Fast reporting: The states informer fetches the last histograms every CheckPredictionInterval (e.g. 20s). It checks if the predicted result is too small or too larger than the last updated prediction exceeding the ResourceDiffThreshold (e.g. 5%), or the updated duration is longer than ForceUpdateInterval (e.g. 600s). If the check result is true, It updates the latest node prediction to the kube-apiserver. + +In the koord-manager, stages to update the Mid-tier resources allocatable are as follows: + +1. NodeMetric lifecycle management: The koord-manager list-watches the Node and the ConfigMap slo-controller-config, and maintains the lifecycle of the NodeMetric CR. Once the colocation strategy in the slo-controller-config updated, the koord-manager parses the config data and updates the node prediction policy and mid colocation policy into the NodeMetric.Spec. +2. Mid resource updating: The koord-manager list-watches the NodeMetric. Once the NodeMetric status is updated, the koord-manager gets the latest node metrics and node prediction, and calculates the Mid allocatable resources based on the Mid over-commitment formula. Finally, it updates the Mid allocatable resources into the Node status as the extended resources (`kubernetes.io/mid-cpu`, `kubernetes.io/mid-memory`). + +#### Scheduling Optimization + +The results of the node prediction on the NodeMetric, the Mid extended resources on the Node and the scheduling Pod +in the scheduler are updated in different time. It is inevitable to find that the scheduler schedules a pod with an +older version of the node prediction, which may cause the schedule result "lagged". + +To relief the lagged prediction, the koordlet and koord-manager try both updating earlier when the +prediction/NodeMetric differs from the previous result than a threshold and set a resource buffer which should +tolerant most of the result changes between synchronizations. + +For the worst case in which the prediction could be lagged too much (e.g. 1 hour), we can maintain a lower bound of +the real Mid allocatable resources inside the scheduler. This part is not planned in the first version of the Mid-tier +over-commitment. + +### API + +#### Node Prediction + +##### Predict Policy + +```go +// ColocationStrategy defines the colocation strategy in slo-controller-config ConfigMap. +type ColocationStrategy struct { + // ... + NodePredictPolicy *slov1alpha1.PredictPolicy `json:"nodePredictPolicy,omitempty"` +} + +type NodeMetricSpec struct { + // ... + PredictPolicy *PredictPolicy `json:"predictPolicy,omitempty"` +} + +// PredictPolicy defines the policy for the node prediction. +type PredictPolicy struct { + ResourceDiffThresholdPercent *int64 `json:"resourceDiffThresholdPercent,omitempty"` + ColdStartPeriodSeconds *int64 `json:"coldStartPeriodSeconds,omitempty"` +} +``` + +##### Predicted Result + +```go +type NodeMetricStatus struct { + // ... + // ProdReclaimableMetric is the estimated reclaimable resources for the Prod-type pods. + ProdReclaimableMetric *ReclaimableMetric `json:"prodReclaimableMetric,omitempty"` +} + +type ReclaimableMetric struct { + // Resource is the resource usage of the prediction. + Resource ResourceMap `json:"resource,omitempty"` +} +``` + +#### Mid Overcommitment + +##### Colocation Strategy + +```go +type ColocationStrategy struct { + // ... + // MidCPUThresholdPercent defines the maximum percentage of the Mid-tier cpu resource dividing the node allocatable. + // MidCPUAllocatable <= NodeCPUAllocatable * MidCPUThresholdPercent / 100. + MidCPUThresholdPercent *int64 `json:"midCPUThresholdPercent,omitempty" validate:"omitempty,min=0,max=100"` + // MidMemoryThresholdPercent defines the maximum percentage of the Mid-tier memory resource dividing the node allocatable. + // MidMemoryAllocatable <= NodeMemoryAllocatable * MidMemoryThresholdPercent / 100. + MidMemoryThresholdPercent *int64 `json:"midMemoryThresholdPercent,omitempty" validate:"omitempty,min=0,max=100"` +} +``` + +##### Extended Resources + +```yaml +apiVersion: v1 +kind: Node +metadata: + name: test-node +status: + allocatable: + cpu: '32' + memory: 129636240Ki + pods: '213' + kubernetes.io/mid-cpu: '16000' # allocatable cpu milli-cores for Mid-tier pods + kubernetes.io/mid-memory: 64818120Ki # allocatable memory bytes for Mid-tier pods + capacity: + cpu: '32' + memory: 129636240Ki + pods: '213' + kubernetes.io/mid-cpu: '16000' + kubernetes.io/mid-memory: 64818120Ki +``` + +### Theoretical Model + +#### Node Peak Prediction + +Before elaborating the peak prediction algorithm, let's formalize the node peak prediction problem. + +Let's denote the usage of a Pod `p` at the time `t` is `U(p, t)`. + +Then the usage of a Node `M` which schedules a set of Pods is `MU(Pods, t) = sum[p in Pods](U(p, t))`. + +> Note that the non-Pod usage of the node can be regarded as the usage of a special pod `S`. + +When we want to predict the node peak at the time `T`, we are calculating +`Peak(Pods, T) = max[t >= T](sum[p in Pods](U(p, t)))`. + +The predicted peak `Peak(Pods, T)` is our node prediction result at `T`. + +#### N-sigma Prediction + +There are several [statistical peak prediction models](#alternatives) which are practical to implement in the online +scheduler. [*N-sigma*](#references) is the picked peak prediction model in the current implementation. It assumes the +timing node metrics follow the Gaussian distribution, which allows us to estimate the node peak with the mean and +standard deviation (stdev): + +`Peak_N-Sigma_v1(Pods, T) = mean[T0 <= t <= T](MU(Pods, t)) + N * stdev[T0 <= t <= T](MU(Pods, t))` + +The `Peak_N-Sigma_v1` is the predicted node peak. It is implemented as the first version of node prediction, which is +calculated based on node-level metrics. + +Moreover, we can calculate with the pods' metrics: + +`Peak_Pods-N-Sigma'(Pods, T) = sum[p in Pods](mean[T0 <= t <= T](U(p, t)) + N * stdev[T0 <= t <= T](U(p, t)))` + +A more conservative is derived from their maximal. The `Peak_N-sigma_v2` is the second version of node prediction, +which also considers the pod-level metrics. + +`Peak_N-Sigma_v2(Pods, T) = max(Peak_N-Sigma_v1(Pods, T), Peak_Pods-N-Sigma(Pods, T))`. + +#### Mid-tier Overcommitment + +In the first version, the Mid-tier resource contains the reclaimable resources which are probably unused in the +long-term by the high-priority (i.e. Prod) pods. +The resource calculation for the Mid-tier resources can be described as follows: + +``` +Allocatable[Mid] := min(Reclaimable[Mid], NodeAllocatable * thresholdRatio) +``` + +- `Reclaimable[Mid] := max(0, reclaimRatio * Allocated[Prod] - Peak[Prod])`. The peak prediction model is used for estimating the future usage of the running Prod pods. The Mid pods can allocate a proportion of reclaimed resources from running Prod pods. +- `NodeAllocatable * thresholdRatio` is the maximal co-located Mid-tier resource setting from a ratio of the node allocatable. + +In next versions, the Mid-tier resource is planned to mix with the default node allocatable (i.e. the Prod allocatable), +which means a Mid pod can allocate the unallocated node allocatable resource, and an idle node is able to schedule Mid +pods. The Prod pods can preempt the Mid pods when the mixed allocatable is exhausted by the Mid pods, so that the +Prod-tier resource is still more stable and guaranteed than the Mid-tier. +Then the resource calculation for the mixed Mid-tier resources can be described as follows: + +``` +Allocatable[Mid]' := min(Reclaimable[Mid], NodeAllocatable * thresholdRatio) + Unallocated[Mid] +Unallocated[Mid] = max(NodeAllocatable - Allocated[Prod], 0) +``` + +## Alternatives + +### Peak Prediction Models + +There are several different peak prediction and time series forecasting models which can estimate the future peak +based on the historical node metrics, including statistical methods and machine learning methods. In this proposal, +statistical peak prediction models are preferred since they are practical to implement in the online scheduling system, +have less overhead of metrics collection than the ML approaches, and more simple to analyze and debug. + +Here are some common statistical peak prediction models: + +1. [Borg-default](#references) + +Borg-default simply over-commits the machine resources in a fixed rate `a`, which means the peak usage is regarded as +the result of the requests dividing `a`. + +Let's denote the resource request of the Pod `p` at the time `t` is `R(p, t)`, where `R(p, t) = 0` when `p` is not +running. Then we have, + +`Peak_Borg-default(Pods, T) = 1/a * sum[p in Pods](R(p, T))`, `a = 1.1` by default. + +2. [Resource Central](#references) + +Resource Central considers the peak of the machine as the sum of the peak of individual pods (or VMs). And a simple +peak prediction of a pod is the percentile of the historical usages, e.g. `percentile[t in [T-C, T]](U(p, t))`. + +`Peak_ResourceCentral(Pods, T) = sum[p in Pods](percentile[t in [T-C, T]](U(p, t)))` + +3. [Max](#references) + +The Max prediction model does not use the historical metrics directly, but takes the maximal of any known peak results. +It gets the more conservative result than the input models. For example, we have a `Max_Borg-default_ResourceCentral` +model calculated from the Borg-default and Resource Central models: + +`Peak_Max_Borg-default_ResourceCentral(Pods, T) = max(Peak_Borg-default(Pods, T), Peak_ResourceCentral(Pods, T))` + +## References + +1. Vertical Pod Autoscaler: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler +2. Bashir, Noman, et al. "Take it to the limit: peak prediction-driven resource overcommitment in datacenters." Proceedings of the Sixteenth European Conference on Computer Systems. 2021. +3. Cortez, Eli, et al. "Resource central: Understanding and predicting workloads for improved resource management in large cloud platforms." Proceedings of the 26th Symposium on Operating Systems Principles. 2017. diff --git a/versioned_docs/version-v1.4/designs/nri-mode-resource-management.md b/versioned_docs/version-v1.4/designs/nri-mode-resource-management.md new file mode 100644 index 000000000..f7e45e5ec --- /dev/null +++ b/versioned_docs/version-v1.4/designs/nri-mode-resource-management.md @@ -0,0 +1,152 @@ +# NRI Mode Resource Management + +## Glossary + +NRI, node resource interface. See: https://github.com/containerd/nri + +## Summary + +We hope to enable NRI mode resource management for koordinator for easy deployment and in-time control. + +## Motivation + +Koordinator as a QoS-based scheduling for efficient orchestration of microservices, AI, and big data workloads on Kubernetes and its runtime hooks support two working [modes](https://github.com/koordinator-sh/koordinator/blob/main/docs/design-archive/koordlet-runtime-hooks.md) for different scenarios: `Standalone` and `Proxy`. However, both of them have some [constraints](https://shimo.im/docs/m4kMLdgO1LIma9qD). NRI (Node Resource Interface), which is a public interface for controlling node resources is a general framework for CRI-compatible container runtime plug-in extensions. It provides a mechanism for extensions to track the state of pod/containers and make limited modifications to their configuration. We'd like to integrate NRI framework to address `Standalone` and `Proxy` constraints based on this community recommend mechanism. + +### Goals + +- Support NRI mode resource management for koordinator. +- Support containerd container runtime. + +### Non-Goals/Future Work + +- Support docker runtime + +## Proposal + +Different from standalone and proxy mode, Koodlet will start an NRI plugin to subscribe pod/container lifecycle events from container runtime (e.g. containerd, crio), and then koordlet NRI plugin will call runtime hooks to adjust pod resources or OCI spec. The flow should be: + +- Get pod/container lifecycle events and OCI format information from container runtime (e.g. containerd, crio). +- Transform the OCI format information into internal protocols. (e.g. PodContext, ContainerContext) to re-use existing runtime hook plugins. +- Transform the runtime hook plugins' response into OCI spec format +- Return OCI spec format response to container runtime(e.g. containerd, crio). + +![nri-proposal.png](/img/nri-proposal.png) + +### User Stories + +#### Story 1 +As a cluster administrator, I want to apply QoS policy before pod's status become running. + +#### Story 2 +As a cluster administrator, I want to deploy koordinator cluster without restart. + +#### Story 3 +As a cluster administrator, I want to adjust resources' policies at runtime. + +#### Story 4 +As a GPU user, I want to inject environment before pod running. + +### Requirements + +- Need to upgrade containerd to >= 1.7.0, crio to >= v1.25.0 + +#### Functional Requirements + +NRI mode should support all existing functionalities supported by standalone and Proxy mode. + +#### Non-Functional Requirements + +Non-functional requirements are user expectations of the solution. Include +considerations for performance, reliability and security. + +### Implementation Details/Notes/Constraints +1. koordlet [NRI plugin](https://github.com/containerd/nri/blob/main/plugins/template/plugin.go) +```go +type nriServer struct { + stub stub.Stub + mask stub.EventMask + options Options // server options +} + +// Enable 3 hooks (RunPodSandbox, CreateContainer, UpdateContainer) in NRI +func (p *nriServer) Configure(config, runtime, version string) (stub.EventMask, error) { +} + +// Sync all pods/containers information before koordlet nri plugin run +func (p *nriServer) Synchronize(pods []*api.PodSandbox, containers []*api.Container) ([]*api.ContainerUpdate, error) { +} + +func (p *nriServer) RunPodSandbox(pod *api.PodSandbox) error { + podCtx.FromNri(pod) + RunHooks(...) + podCtx.NriDone() +} + +func (p *nriServer) CreateContainer(pod *api.PodSandbox, container *api.Container) (*api.ContainerAdjustment, []*api.ContainerUpdate, error) { + containerCtx.FromNri(pod, container) + RunHooks(...) + containCtx.NriDone() +} + +func (p *nriServer) UpdateContainer(pod *api.PodSandbox, container *api.Container) ([]*api.ContainerUpdate, error) { + containerCtx.FromNri(pod, container) + RunHooks(...) + containCtx.NriDone() +} +``` +2. koordlet enhancement for NRI +- PodContext +```go +// fill PodContext from OCI spec +func (p *PodContext) FromNri(pod *api.PodSandbox) { +} + +// apply QoS resource policies for pod +func (p *PodContext) NriDone() { +} +``` +- ContainerContext +```go +// fill ContainerContext from OCI spec +func (c *ContainerContext) FromNri(pod *api.PodSandbox, container *api.Container) { +} + +// apply QoS resource policies for container +func (c *ContainerContext) NriDone() (*api.ContainerAdjustment, []*api.ContainerUpdate, error) { +} +``` + +### Risks and Mitigations + +## Alternatives +There are several approaches to extending the Kubernetes CRI (Container Runtime Interface) to manage container resources such as `standalone` and `proxy`. Under `standalone` running mode, resource isolation parameters will be injected asynchronously. Under `proxy` running mode, proxy can hijack CRI requests from kubelet for pods and then apply resource policies in time. However, `proxy` mode needs to configure and restart kubelet. + +There are a little difference in execution timing between `NRI` and `proxy` modes. Hook points (execution timing) are not exactly same. The biggest difference is `proxy` call koordlet hooks between kubelet and containerd. However, NRI will call NRI plugin (koodlet hooks) in containerd, that means containerd still could do something before or after containerd call NRI plugin (koordlet hooks). For example, under `NRI` running mode, containerd setup pod network first and then call NRI plugin (koordlet hooks) in RunPodSanbox, but under `proxy` running mode, containerd couldn't do anything before koordlet hooks running when `proxy` handle RunPodSandbox CRI request. + +- Standalone + + - kubelet -- CRI Request -> CRI Runtime -- OCI Spec -> OCI compatible runtime -> containers + - kubelet -> Node Agent -> CRI Runtime / containers + +![standalone.png](/img/standalone.png) + +- Proxy + + - kubelet -- CRI Request -> CRI Proxy -- CRI Request (hooked) -> CRI Runtime -- OCI Spec -> OCI compatible runtime -> containers + +![proxy.png](/img/proxy.png) + +- NRI + + - kubelet -- CRI Request -> CRI Runtime -- OCI Spec --> OCI compatible runtime -> containers +                  ↘   ↗ +                Koordlet NRI plugin + +![nri.png](/img/nri.png) + +## Upgrade Strategy + +- Need to upgrade containerd to 1.7.0+ or CRIO to 1.26.0+ +- Need to enable NRI + + diff --git a/versioned_docs/version-v1.4/designs/pod-migration-job.md b/versioned_docs/version-v1.4/designs/pod-migration-job.md new file mode 100644 index 000000000..47a94aba8 --- /dev/null +++ b/versioned_docs/version-v1.4/designs/pod-migration-job.md @@ -0,0 +1,374 @@ +# PodMigrationJob + +## Summary + +This proposal defines a CRD-based Pod migration API, through which the descheduler or other automatic fault recovery components can evict or delete Pods more safely. At the same time, the proposal also describes the specific implementation details of the API. + +## Motivation + +Migrating Pods is an important capability that many components (such as deschedulers) rely on, and can be used to optimize scheduling or help resolve workload runtime quality issues. We believe that pod migration is a complex process, involving steps such as auditing, resource allocation, and application startup, and is mixed with application upgrading, scaling scenarios, and resource operation and maintenance operations by cluster administrators. Therefore, how to manage the stability risk of this process to ensure that the application does not fail due to the migration of Pods is a very critical issue that must be resolved. + +Therefore, it is necessary to realize a final state-oriented migration capability based on CRD, track the status of each process in the migration, and perceive scenarios such as upgrading and scaling of the application. + +### Goals + +1. Defines a CRD-based Pod Migration Job API, through which the descheduler can evict or delete Pods more safely. +2. Describe in detail the design details behind the API. + +### Non-Goals/Future Work + +1. A new descheduler framework +2. Descheduling capability for different scenarios such as load-aware descheduling, defragemention, etc. +3. The details about Deterministic preemption that preempts other Pods for Reservation. + +## Proposal + +### User Stories + +#### Story 1 + +The descheduler in the K8s community evicts pods to be rescheduled according to different strategies. However, it does not guarantee whether the evicted Pod has resources available after re-creation. If a large number of new Pods are in the Pending state when the resources in the cluster are tight, may lower the application availabilities. + +#### Story 2 + +The descheduler evicts the Pod through the Eviction API, and the Eviction API decides whether to delete the Pod according to the PDB status. However, it is unable to perceive workload upgrades, scaling and other scenarios in which Pods are deleted, which will also bring security risks. + +#### Story 3 + +The Pod migration capability itself can be provided to users as a service. Users can integrate this API in their own systems to achieve safe migration, and are no longer limited to deschedulers. + + +### Basic Migration API + +These APIs provide cluster administrators with more fine-grained migration control capabilities, which can better reduce risks. + +- `scheduling.koordinator.sh/eviction-cost` indicates the eviction cost. It can be used to set to an int32. The implicit eviction cost for pods that don't set the annotation is 0, negative values are permitted. If set the cost ith `math.MaxInt32`, it means the Pod will not be evicted. Pods with lower eviction cost are preferred to be evicted before pods with higher eviction cost. If a batch of Pods to be evicted have the same priority, they will be sorted by cost, and the Pod with the smallest cost will be evicted. Although the K8s community has [Pod Deletion Cost #2255](https://github.com/kubernetes/enhancements/issues/2255), it is not a general mechanism. To avoid conflicts with components that use `Pod Deletion Cost`, users can individually mark the eviction cost for Pods. + + +### Pod Migration Job CRD + +In order to support the above user stories, a Custom Resource Definition(CRD) named `PodMigrationJob` is proposed to ensure the migration process safely. + +#### Migration Job Spec + +```go + +// PodMigrationJob is the Schema for the PodMigrationJob API +// +k8s:openapi-gen=true +// +kubebuilder:resource:scope=Cluster +type PodMigrationJob struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + Spec PodMigrationJobSpec `json:"spec,omitempty"` + Status PodMigrationJobStatus `json:"status,omitempty"` +} + +type PodMigrationJobSpec struct { + // Paused indicates whether the PodMigrationJob should to work or not. + // Default is false + // +optional + Paused bool `json:"paused,omitempty"` + + // TTL controls the PodMigrationJob timeout duration. + // +optional + TTL *metav1.Duration `json:"ttl,omitempty"` + + // Mode represents the operating mode of the Job + // Default is PodMigrationJobModeReservationFirst + // +optional + Mode PodMigrationJobMode `json:"mode,omitempty"` + + // PodRef represents the Pod that be migrated + // +required + PodRef *corev1.ObjectReference `json:"podRef"` + + // ReservationOptions defines the Reservation options for migrated Pod + // +optional + ReservationOptions *PodMigrateReservationOptions `json:"reservationOptions,omitempty"` + + // DeleteOptions defines the deleting options for the migrated Pod and preempted Pods + // +optional + DeleteOptions *metav1.DeleteOptions `json:"deleteOptions,omitempty"` +} + +type PodMigrationJobMode string + +const ( + PodMigrationJobModeReservationFirst PodMigrationJobMode = "ReservationFirst" + PodMigrationJobModeEvictionDirectly PodMigrationJobMode = "EvictDirectly" +) + +type PodMigrateReservationOptions struct { + // ReservationRef if specified, PodMigrationJob will check if the status of Reservation is available. + // ReservationRef if not specified, PodMigrationJob controller will create Reservation by Template, + // and update the ReservationRef to reference the Reservation + // +optional + ReservationRef *corev1.ObjectReference `json:"reservationRef,omitempty"` + + // Template is the object that describes the Reservation that will be created if not specified ReservationRef + // +optional + Template *ReservationTemplateSpec `json:"template,omitempty"` + + // PreemptionOption decides whether to preempt other Pods. + // The preemption is safe and reserves resources for preempted Pods. + // +optional + PreemptionOptions *PodMigrationJobPreemptionOptions `json:"preemptionOptions,omitempty"` +} + +type PodMigrationJobPreemptionOptions struct { + // Reserved object. +} +``` + +- `Paused` indicates whether the PodMigrationJob should to work or not. In some scenarios, the user does not expect the PodMigrationJob Controller to process the PodMigrationJob immediately, but rather to decide whether to execute it after completing some operations similar to auditing. +- `TimeoutInSeconds` controls the PodMigrationJob timeout duration. +- The `PodMigrationJob` support two modes defined by the field `Mode`: + - `PodMigrationJobModeReservationFirst` means that before migrating a Pod, try to reserve resources through the `Reservation` API, delete the Pod to be migrated after successfully reserved, and observe the status of the `Reservation` to ensure that the `Reservation` is consumed. + - `PodMigrationJobModeEvictionDirectly` indicates that the user clearly knows the risk of evicting the Pod and decides to evict the Pod directly. + - If `Mode` is not specified, `PodMigrationJobModeReservationFirst` is used by default +- `PodRef` represents the Pod that be migrated. The field is required. +- `ReservationOptions` defines options for how to reserve resource through `Reservation` API: + - `ReservationRef` if is specified, the referenced `Reservation` instance is used first. In some scenarios, such as defragmentation, in order to ensure the reliability of the upper-layer logic, resources may have been reserved on the target node. In this case, the specified `Reservation` can be used directly. + - `Template` describes the spec of `Reservation`. It is often not necessary to set this field. When neither `ReservationRef` nor `Template` is specified, the `PodMigrationJob controller` will construct the `ReservationSpec` reserved resources according to the Spec of the migrated Pod. If `Template` is set, the `ReservationTemplateSpec` and the Spec of the migrated Pod will be merged to construct the `ReservationSpec` reserved resources. + - `PreemptionOptions` decides whether to preempt other Pods if reserved resources failed. The specific details of preemption will be submitted in a separate proposal description in future work, and will not be expanded here for the time being. +- `DeleteOptions` defines the options of delete operation. Whether to delete a Pod through the `K8s Delete API` or evict a Pod through the `K8s Eviction API` depends on how the user configures the parameters of the `PodMigrationJob Controller`. Users only need to set `DeleteOptions` according to the workload in their own cluster. + +#### Migration Job Status + +```go +type PodMigrationJobStatus struct { + // PodMigrationJobPhase represents the phase of a PodMigrationJob is a simple, high-level summary of where the PodMigrationJob is in its lifecycle. + // e.g. Pending/Running/Failed + Phase PodMigrationJobPhase `json:"phase,omitempty"` + // Status represents the current status of PodMigrationJob + // e.g. ReservationCreated + Status string `json:"status,omitempty"` + // Reason represents a brief CamelCase message indicating details about why the PodMigrationJob is in this state. + Reason string `json:"reason,omitempty"` + // Message represents a human-readable message indicating details about why the PodMigrationJob is in this state. + Message string `json:"message,omitempty"` + // Conditions records the stats of PodMigrationJob + Conditions []PodMigrationJobCondition `json:"conditions,omitempty"` + // NodeName represents the node's name of migrated Pod + NodeName string `json:"nodeName,omitempty"` + // PodRef represents the newly created Pod after being migrated + PodRef *corev1.ObjectReference `json:"podRef,omitempty"` + // PreemptedPodsRef represents the Pods that be preempted + PreemptedPodsRef []corev1.ObjectReference `json:"preemptedPodsRef,omitempty"` + // PreemptedPodsReservations records information about Reservations created due to preemption + PreemptedPodsReservations []PodMigrationJobPreemptedReservation `json:"preemptedPodsReservation,omitempty"` +} + +type PodMigrationJobPreemptedReservation struct { + // Namespace represents the namespace of Reservation + Namespace string `json:"namespace,omitempty"` + // Name represents the name of Reservation + Name string `json:"name,omitempty"` + // NodeName represents the assigned node for Reservation by scheduler + NodeName string `json:"nodeName,omitempty"` + // Phase represents the Phase of Reservation + Phase string `json:"phase,omitempty"` + // PreemptedPodRef represents the Pod that be preempted + PreemptedPodRef *corev1.ObjectReference `json:"preemptedPodRef,omitempty"` + // PodsRef represents the newly created Pods after being preempted + PodsRef []corev1.ObjectReference `json:"podsRef,omitempty"` +} + +type PodMigrationJobCondition struct { + // Type is the type of the condition. + Type PodMigrationJobConditionType `json:"type"` + // Status is the status of the condition. + // Can be True, False, Unknown. + Status PodMigrationJobConditionStatus `json:"status"` + // Last time we probed the condition. + // +nullable + LastProbeTime metav1.Time `json:"lastProbeTime,omitempty"` + // Last time the condition transitioned from one status to another. + // +nullable + LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"` + // Unique, one-word, CamelCase reason for the condition's last transition. + Reason string `json:"reason,omitempty"` + // Human-readable message indicating details about last transition. + Message string `json:"message,omitempty"` +} + +type PodMigrationJobPhase string + +const ( + // PodMigrationJobPending represents the initial status + PodMigrationJobPending PodMigrationJobPhase = "Pending" + // PodMigrationJobRunning represents the PodMigrationJob is being processed + PodMigrationJobRunning PodMigrationJobPhase = "Running" + // PodMigrationJobSucceed represents the PodMigrationJob processed successfully + PodMigrationJobSucceed PodMigrationJobPhase = "Succeed" + // PodMigrationJobFailed represents the PodMigrationJob process failed caused by Timeout, Reservation failed, etc. + PodMigrationJobFailed PodMigrationJobPhase = "Failed" + // PodMigrationJobAborted represents the user forcefully aborted the PodMigrationJob. + PodMigrationJobAborted PodMigrationJobPhase = "Aborted" +) + +// These are valid conditions of PodMigrationJob. +const ( + PodMigrationJobConditionReservationCreated PodMigrationJobConditionType = "ReservationCreated" + PodMigrationJobConditionReservationScheduled PodMigrationJobConditionType = "ReservationScheduled" + PodMigrationJobConditionPreemption PodMigrationJobConditionType = "Preemption" + PodMigrationJobConditionEviction PodMigrationJobConditionType = "Eviction" + PodMigrationJobConditionPodScheduled PodMigrationJobConditionType = "PodScheduled" + PodMigrationJobConditionReservationPodBoundReservation PodMigrationJobConditionType = "PodBoundReservation" + PodMigrationJobConditionReservationBound PodMigrationJobConditionType = "ReservationBound" +) + +// These are valid reasons of PodMigrationJob. +const ( + PodMigrationJobReasonTimeout = "Timeout" + PodMigrationJobReasonFailedCreateReservation = "FailedCreateReservation" + PodMigrationJobReasonUnschedulable = "Unschedulable" + PodMigrationJobReasonMissingPod = "MissingPod" + PodMigrationJobReasonMissingReservation = "MissingReservation" + PodMigrationJobReasonPreempting = "Preempting" + PodMigrationJobReasonPreemptComplete = "PreemptComplete" + PodMigrationJobReasonEvicting = "Evicting" + PodMigrationJobReasonFailedEvict = "FailedEvict" + PodMigrationJobReasonEvictComplete = "EvictComplete" + PodMigrationJobReasonWaitForPodBindReservation = "WaitForPodBindReservation" +) + +type PodMigrationJobConditionStatus string + +const ( + PodMigrationJobConditionStatusTrue PodMigrationJobConditionStatus = "True" + PodMigrationJobConditionStatusFalse PodMigrationJobConditionStatus = "False" + PodMigrationJobConditionStatusUnknown PodMigrationJobConditionStatus = "Unknown" +) +``` + +### Implementation Details/Notes/Constraints + +#### PodMigrationJob Controller + +The difference between `PodMigrationJobController` and general controller is that `PodMigrationJobController` will evaluate all pending PodMigrationJobs together (ie PodMigrationJob.Phase is Pending) and select a batch of PodMigrationJob and reconcile them. This selection process is called the arbitration mechanism. The reason why the arbitration mechanism is introduced is mainly to control the stability risk and control the cost of migrating Pods. The arbitration mechanism includes three stages: `Group`, `Filter` and `Sort`. + +##### Group PodMigrationJob + +Aggregate according to different workloads to facilitate the processing of subsequent processes + +- Aggregate PodMigrationJob by workload +- Aggregate PodMigrationJob by Node +- Aggregate PodMigrationJob by Namespace + +##### Filter PodMigrationJob + +- Check how many PodMigrationJob of each workload are in the Running state, and record them as ***migratingReplicas***. If the ***migratingReplicas*** reach a certain threshold, they will be excluded. The detailed algorithm of this threshold is described later. +- Check the number of ***unavailableReplicas*** of each workload, and determine whether the ***unavailableReplicas + migratingReplicas*** conform to the corresponding [PDB(Pod Disruption Budget)](https://kubernetes.io/docs/tasks/run-application/configure-pdb/) or [PUB(Pod Unavailable Budget)](https://openkruise.io/docs/user-manuals/podunavailablebudget). If there is no PDB or PUB, use the algorithm to calculate dynamically. If not, exclude the corresponding PodMigrationJob. +- Check the number of Pods being migrated on the node where each target Pod is located. If it exceeds the maximum migration amount for a single node, exclude it. +- Check the number of Pods being migrated in the Namespace where each target Pod is located. If it exceeds the maximum migration amount for a single Namespace, exclude it + +The detailed algorithm of Workload Max Migrating/Unavailable Replicas: + +```go +func GetMaxMigrating(replicas int, intOrPercent *intstr.IntOrString) (int, error) { + return GetMaxUnavailable(replicas, intOrPercent) +} + +func GetMaxUnavailable(replicas int, intOrPercent *intstr.IntOrString) (int, error) { + if intOrPercent == nil { + if replicas > 10 { + s := intstr.FromString("10%") + intOrPercent = &s + } else if replicas >= 4 && replicas <= 10 { + s := intstr.FromInt(2) + intOrPercent = &s + } else { + s := intstr.FromInt(1) + intOrPercent = &s + } + } + return intstr.GetValueFromIntOrPercent(intOrPercent, replicas, true) +} +``` + +##### Sort PodMigrationJob + +- Pods with higher QoS requirements are given priority, LSE > LSR > LS > BE +- Pods with higher priority will be processed first +- The higher migration priority will be processed first +- If the Pod has already initiated a migration job in the past and it fails, sort by the number of times. The lower the number of times, the priority will be given to processing +- If the workload where the Pod is located has been descheduled for a certain number of times in the past, it is sorted according to the number of times. The lower the number of times, the priority will be processed. +- Sort by the number of replicas being migrated by the workload. The lower the number of replicas being migrated, the priority will be given to processing. + +##### Execute PodMigrationJob + +- Update PodMigrationJobStatus.Phase to Running to trigger the PodMigrationJob controller reconcile these jobs +- PodMigrationJob controller reconciles process: + - If the mode of PodMigrationJob is `EvictionDirectly`, just delete the Pod through the delete method that configured in PodMigrationJob controller. And update the phase of PodMigrationJob to Success. + - If not specified ReservationOptions.ReservationRef, create the Reservation instance by the reservation template or Pod spec to reserve resources. And updates the created Reservation instance to the ReservationOptions.ReservationRef. + - Check the status of Reservation to determine whether reserve resource successfully. + - If failed to reserve, abort the PodMigrationJob and update the phase of PodMigrationJob to Fail + - If successfully reserve, delete the Pod through the delete method that configured in PodMigrationJob controller. + - Check the Reservation status to determine whether the Reservation consumed. + - If Reservation consumed, tracks the status of Reservation and update the status to PodMigrationJob + - Update phase of PodMigrationJob to Success. + +##### Migration Stability mechanism + +- Support for disabling this capability by configuration +- Supports a simple central flow control mechanism to limit the number of migrations over a period of time. + +See the Configuration section for more details + +#### Controller Configuration + +User can configure the `MigrationControllerArgs` through Koordinator Descheduler ConfigMap. + +```go +// MigrationControllerArgs holds arguments used to configure the MigrationController +type MigrationControllerArgs struct { + metav1.TypeMeta + + // DryRun means only execute the entire migration logic except create Reservation or Delete Pod + // Default is false + DryRun bool `json:"dryRun,omitempty"` + + // EvictFailedBarePods allows pods without ownerReferences and in failed phase to be evicted. + EvictFailedBarePods bool `json:"evictFailedBarePods"` + + // EvictLocalStoragePods allows pods using local storage to be evicted. + EvictLocalStoragePods bool `json:"evictLocalStoragePods"` + + // EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods) + EvictSystemCriticalPods bool `json:"evictSystemCriticalPods"` + + // IgnorePVCPods prevents pods with PVCs from being evicted. + IgnorePvcPods bool `json:"ignorePvcPods"` + + // LabelSelector sets whether to apply label filtering when evicting. + // Any pod matching the label selector is considered evictable. + LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"` + + // FlowControlQPS controls the number of arbitrations per second + FlowControlQPS string `json:"flowControlQPS,omitempty"` + // FlowControlBurst is the maximum number of tokens + FlowControlBurst int32 `json:"flowControlBurst,omitempty"` + + // MaxMigratingPerNode represents he maximum number of pods that can be migrating during migrate per node. + MaxMigratingPerNode *int32 `json:"maxMigratingPerNode,omitempty"` + + // MaxMigratingPerNamespace represents he maximum number of pods that can be migrating during migrate per namespace. + MaxMigratingPerNamespace *int32 `json:"maxMigratingPerNamespace,omitempty"` + + // MaxMigratingPerWorkload represents he maximum number of pods that can be migrating during migrate per workload. + // Value can be an absolute number (ex: 5) or a percentage of desired pods (ex: 10%). + MaxMigratingPerWorkload *intstr.IntOrString `json:"maxMigratingPerWorkload,omitempty"` + + // MaxUnavailablePerWorkload represents he maximum number of pods that can be unavailable during migrate per workload. + // The unavailable state includes NotRunning/NotReady/Migrating/Evicting + // Value can be an absolute number (ex: 5) or a percentage of desired pods (ex: 10%). + MaxUnavailablePerWorkload *intstr.IntOrString `json:"maxUnavailablePerWorkload,omitempty"` + + // EvictionPolicy represents how to delete Pod, support "Delete" and "Eviction", default value is "Eviction" + EvictionPolicy string `json:"evictionPolicy,omitempty"` + // DefaultDeleteOptions defines options when deleting migrated pods and preempted pods through the method specified by EvictionPolicy + DefaultDeleteOptions *metav1.DeleteOptions `json:"defaultDeleteOptions,omitempty"` +} + +``` \ No newline at end of file diff --git a/versioned_docs/version-v1.4/designs/resource-reservation.md b/versioned_docs/version-v1.4/designs/resource-reservation.md new file mode 100644 index 000000000..7fa73c84f --- /dev/null +++ b/versioned_docs/version-v1.4/designs/resource-reservation.md @@ -0,0 +1,245 @@ +# Resource Reservation + +## Summary + +A scheduling mechanism and its API is provided to reserve node resources for pods may not be created yet. + +## Motivation + +Pods are fundamental units for allocating node resources in Kubernetes, which bind resource requirements with business logic. The scheduler is not able to reserve node resources for specific pods or workloads. We may try using a [fake pod](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#how-can-i-configure-overprovisioning-with-cluster-autoscaler) to prepare resources by the preemption mechanism. However, fake pods can be preempted by any scheduled pods with higher priorities, which make resources get scrambled unexpectedly. + +In Koordinator, a resource reservation mechanism is proposed to enhance scheduling and especially benefits scenarios below: + +1. Preemption: Existing preemption does not guarantee that only preempting pods can allocate preempted resources. With a reservation, the scheduler should be able to "lock" resources preventing from allocation of other pods with the same or higher priority. +2. De-scheduling: For the descheduler, it is better to ensure sufficient resources with the reservation before pods get rescheduled. Otherwise, rescheduled pods may not be runnable anymore and make the belonging application disrupted. +3. Horizontal scaling: Using reservation to achieve more deterministic horizontal scaling. e.g. Submit a reservation and make sure it is available before scaling up replicas. +4. Resource Pre-allocation: Sometimes we want to pre-allocate node resources for future resource demands even if the resources are not currently allocatable. Reservation can help with this and it should make no physical cost. + +### Goals + +- Define the basic API of resource reservation for *Motivations<1,2,3>*, extensible for supporting *Motivation<4>* in the future. +- Provide a scheduler plugin that implements above reservation mechanism. + +### Non-Goals/Future Work + +- Detailed design of reservative preemption/descheduler/horizontal scaler/pre-allocation. +- Modify kubelet admission control for reservation objects. + +## Proposal + +### User Stories + +#### Story 1 + +As a Kubernetes developer, I want to enhance the current **preemption** mechanism since preempted resources may be allocated by pods other than the preemptor. The scheduler can create a reservation for the preempting pods, so the ownership of preempted resources can be guaranteed, making the preemption more reliable. + +#### Story 2 + +As a cluster administrator, I want to use **descheduler** to migrate pods that are placed abnormally to somewhere they could "live better" and fulfill orchestration requirements of the app. e.g. Move pods on a over-utilized node to idler nodes and bind CPUs of same NUMA node. Reservations can be created before rescheduling pods, helping ensure there are sufficient resources and well placement. + +#### Story 3 + +As an application administrator, I want to make the **horizontal scaling** of my app more deterministic by submitting reservations before a scale-up. Besides, I can also reserve resources after a scale-down for future demands. It is useful especially when we want a guaranteed scale-up of applications for the coming business peak. + +#### Story 4 + +As a cluster administrator, I want to **pre-allocate** node resources for future usage no matter whether they are available now or not. I want to allocate the future free resources but do not disrupt the running of scheduled pods. Reservation can be made to pre-allocate resources since it makes no physical cost to the node. It may be in a `Waiting` state. When there is enough space for the reservation, it will become `Available` for the owner pods' scheduling. + +### API + +In this section, a Custom Resource Definition (CRD) named `Reservation` is proposed to allow the scheduler to reserve node resources for specific pods. + +![image](/img/resource-reservation.svg) + +```go +// Reservation objects are non-namespaced. +// It can reserve resources for pods of any namespace. Any affinity/anti-affinity of reservation scheduling can be +// specified in the pod template. +type Reservation struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + Spec ReservationSpec `json:"spec,omitempty"` + Status ReservationStatus `json:"status,omitempty"` +} + +type ReservationSpec struct { + // Template defines the scheduling requirements (resources, affinities, images, ...) processed by the scheduler just + // like a normal pod. + // If the `template.spec.nodeName` is specified, the scheduler will not choose another node but reserve resources on + // the specified node. + Template *corev1.PodTemplateSpec `json:"template,omitempty"` + // Specify the owners who can allocate the reserved resources. + // Multiple owner selectors and ORed. + Owners []ReservationOwner `json:"owners,omitempty"` + // By default, the resources requirements of reservation (specified in `template.spec`) is filtered by whether the + // node has sufficient free resources (i.e. ReservationRequest < NodeFree). + // When `preAllocation` is set, the scheduler will skip this validation and allow overcommitment. The scheduled + // reservation would be waiting to be available until free resources are sufficient. + // NOTE: Not supported in v0.6. + PreAllocation bool `json:"preAllocation,omitempty"` + // Time-to-Live period for the reservation. + // `expires` and `ttl` are mutually exclusive. If both `ttl` and `expires` are not specified, a very + // long TTL will be picked as default. Set 0 to disable the expiration. + TTL *metav1.Duration `json:"ttl,omitempty"` + // Expired timestamp when the reservation expires. + // `expires` and `ttl` are mutually exclusive. Defaults to being set dynamically at runtime based on the `ttl`. + Expires *metav1.Time `json:"expires,omitempty"` +} + +type ReservationStatus struct { + // The `phase` indicates whether is reservation is waiting for process (`Pending`), available to allocate + // (`Available`) or timeout/expired to get cleanup (Failed). + Phase ReservationPhase `json:"phase,omitempty"` + // The `conditions` indicate the messages of reason why the reservation is still pending. + Conditions []ReservationCondition `json:"conditions,omitempty"` + // Current resource owners which allocated the reservation resources. + CurrentOwners []corev1.ObjectReference `json:"currentOwners,omitempty"` + // Name of node the reservation is scheduled on. + NodeName string `json:"nodeName,omitempty"` + // Resource reserved and allocatable for owners. + Allocatable corev1.ResourceList `json:"allocatable,omitempty"` + // Resource allocated by current owners. + Allocated corev1.ResourceList `json:"allocated,omitempty"` +} + +type ReservationOwner struct { + // Multiple field selectors are ANDed. + Object *corev1.ObjectReference `json:"object,omitempty"` + Controller *ReservationControllerReference `json:"controller,omitempty"` + LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"` +} + +type ReservationControllerReference struct { + // Extend with a `namespace` field for reference different namespaces. + metav1.OwnerReference `json:",inline"` + Namespace string `json:"namespace,omitempty"` +} + +type ReservationPhase string + +const ( + // ReservationPending indicates the Reservation has not been processed by the scheduler or is unschedulable for + // some reasons (e.g. the resource requirements cannot get satisfied). + ReservationPending ReservationPhase = "Pending" + // ReservationAvailable indicates the Reservation is both scheduled and available for allocation. + ReservationAvailable ReservationPhase = "Available" + // ReservationWaiting indicates the Reservation is scheduled, but the resources to reserve are not ready for + // allocation (e.g. in pre-allocation for running pods). + ReservationWaiting ReservationPhase = "Waiting" + // ReservationFailed indicates the Reservation is failed to reserve resources, due to expiration or marked as + // unavailable, which the object is not available to allocate and will get cleaned in the future. + ReservationFailed ReservationPhase = "Failed" +) + +type ReservationCondition struct { + Type ReservationConditionType `json:"type,omitempty"` + Status ConditionStatus `json:"status,omitempty"` + Reason string `json:"reason,omitempty"` + Message string `json:"message,omitempty"` + LastProbeTime metav1.Time `json:"lastProbeTime,omitempty"` + LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty"` +} + +type ReservationConditionType string + +const ( + ReservationConditionScheduled ReservationConditionType = "Scheduled" + ReservationConditionReady ReservationConditionType = "Ready" +) + +type ConditionStatus string + +const ( + ConditionStatusTrue ConditionStatus = "True" + ConditionStatusFalse ConditionStatus = "False" + ConditionStatusUnknown ConditionStatus = "Unknown" +) + +const ( + ReasonReservationScheduled = "Scheduled" + ReasonReservationUnschedulable = "Unschedulable" + ReasonReservationAvailable = "Available" + ReasonReservationExpired = "Expired" +) +``` + +### Implementation Details + +#### Reservation Plugin + +##### Schedule Reservations + +A `Reservation` object has its scheduling requirements like a pod. Ideally, A `Reservation` object should get processed directly by the scheduler like a pod. However, it can require a series of modifications on [scheduling framework](https://kubernetes.io/docs/concepts/scheduling-eviction/scheduling-framework/), losing the compatibility with standard kube-scheduler, kubelet, autoscaler, etc. In the reservation plugin, we fake one *reservation pod* for one `Reservation` inside the scheduler to fulfill general scheduling plugins (noderesources, nodeaffinity, tainttolerations, ...). The scheduling framework can handle `Reservation` objects by processing fake pods in both [scheduling cycle and binding cycle](https://kubernetes.io/docs/concepts/scheduling-eviction/scheduling-framework/#scheduling-cycle-binding-cycle). + +A fake pod inside the scheduler can construct the same affinity/anti-affinity constraints as owner pods, which may change the reservation result. To handle this problem, koord-scheduler extends the framework to skip check of pod affinity for existing reservations in the `Filter` phase. + +A reservation specified `PreAllocation` intends to pre-allocate resources on nodes. The scheduler will skip its filtering of node resources in the scheduling cycle. However, the scheduled reservation will be `Waiting` to be `Available` until there are enough resources to fulfill its requests. + +If all nodes are unscheduled for the reservation, the scheduler keeps its status as `Pending` and sets `Conditions` with the failure message. + +Once the scheduling decision has been made, the corresponding `Reservation` object is updated with a new status indicating whether the reservation succeeded or not. The fake pod does not expose to other components, and the kubelet without modification does not perceive a `Reservation` assigned. Fortunately, a `Reservation` does not need to be executable on the node, so existing containers can keep running as usual without additional admissions. + +If a reservation has set the `nodeName` (inside the `template` field), the scheduler is responsible for checking if the node can fulfill the reservation since kubelet does not do admissions for the reservation. + +##### Allocate Reserved Resources + +Let's call the reservation is *allocatable* for a pod if: + +1. The reservation is available. +2. The pod matches the reservation owner spec. +3. There are sufficient free resources in the reservation to fulfill the pod. + +When the reservation plugin is enabled, the scheduler checks for every scheduling pod if there are allocatable reservations on a node. With a `Score` plugin implemented, the scheduler prefers pods to schedule on nodes which have more allocatable reserved resources. + +When a pod is scheduled on a node with allocatable reservations, it allocates resources belonging to one of reservations. To pick one of reservations, we choose the one which can get most reserved resources allocated (i.e. MostAllocated). And the scheduler also annotates the pod with the reservation info. + +##### Expiration and Cleanup + +When a reservation has been created for a long time exceeding the `TTL` or `Expires`, the scheduler updates its status as `Expired`. For expired reservations, the scheduler will cleanup them with a custom garbage collection period. + +When a node is deleted, the available and waiting reservations on the node should be marked as `Failed` since they are not allocatable any more. + +#### Use Cases + +To generally reserve node resources, submit a `Reservation` and set the pod template in the field `spec.template`. Then the koord-scheduler will update this `Reservation` with the scheduling result and the resources will get reserved. + +To be more specific, + +- `spec.template` specifies the fundamental resource requirements of a reservation. The scheduler will schedule the fake pod based on the template. +- `spec.owners` specifies which kinds of pods can use the reservation. +- `spec.ttl` and `expires` specifies the expiration for the reservation. +- `spec.preAllocation` indicates whether the scheduler should filter with its resource requirements. Otherwise, the pre-allocation of node resources is allowed, and the reservation will become available until there are sufficient resources. +- `status.phase` is marked as `Pending` when the Reservation is created. And it is marked as `Available` when the Reservation is successfully scheduled. +- `status.conditions` shows why the reservation is unscheduled or failed. +- When a Reservation is `Available` on the node, only specified pods can allocate the reserved resources. + +##### Usage in Preemption + +The [Priority Preemption](https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption/#preemption) happens in the PostFilter phase trying to make preemptive pods schedulable by evicting low-priority pods. When a pod succeeds the preemption, the pod `status` will be patched with a *nominated node* where the scheduler do the eviction. However, the preemptor's nominated node is not always the same as the scheduled node, since the scheduler does not reserve resources for the preemptor. +To ensure the preemptive resources are for the preemptor, firstly the scheduler can create a reservation that both sets `owners` with the preemptor pod and relevant affinity rules for reserving resources of the preempts. Then the scheduler evict pods, and the reservation will become `Available` once the resources are released. Finally, the preemptor pods can get scheduled on the nodes with preemptive resource reserved. + +##### Usage in Descheduling + +Before a pod is rescheduled, the descheduler can create a reservation that sets `template` and `owners` for the candidate. When the reservation becomes `Available`, the descheduler can assign the pod to allocate the reserved resources. This solves the problem in which the rescheduled pod has stopped at the old node but cannot run on the new node. Moreover, the descheduler can migrate resources between pods by setting the `preAllocation` field. + +##### Usage in Pre-allocation + +Reservations with `preAllocation` specified allow users to pre-allocate the node resources from running pods. The `status.phase` of the reservation is set as `Waiting` until the resources are released, indicating that its availability is conditional. Once the referenced pods have terminated, the `phase` is `Available` for owners, and the pre-allocation succeeds. + +### Risks and Mitigations + +Kubelet without any modification possibly ignore `Reservation` objects in predicate admission, which increases the chance of unexpected overcommitment at nodes. `Reservation` does not require any physical resources to be executable, so the overcommitment is mainly a problem only when pods get scheduled with `Reservation` and start to run, which is somewhat easier to mitigate since Kubelet do admit these pods. To further descrease the possibility of unexpected overcommitment or pods admit failures, we could use resource estimation for in-flight pods, balance pods to the nodes with less reserved resources, etc. + +## Unsolved Problems + +As stated above, `Reservation` can generate the same pod affinity/anti-affinity rules as the owner pods. The problem gets resolved in the koord-scheduler by extending scheduling framework, but it still limits the standard kube-scheduler. + +## Alternatives + +### Use a `pause` pod with a low priority to reserve resources + +Reserving resources with [`pause` pods with very low assigned priority](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#how-can-i-configure-overprovisioning-with-cluster-autoscaler) does work when the preemption can be precisely enabled for specific pods. In the example of cluster autoscaler, `pause` pods are helpful when we need to overprovision resources to prevent idle nodes from scaling down by CA. However, a `pause` pod has no reservation guarantee except `priority`. As declared above, many scenarios require reservations to rely on other pod characteristics (e.g. names, namespaces, labels, priorityClass), where `pause` pods cannot meet the demands. + +## References + +1. [Kueue Pod Resource Reservation](https://docs.google.com/document/d/1sbFUA_9qWtorJkcukNULr12FKX6lMvISiINxAURHNFo) diff --git a/versioned_docs/version-v1.4/designs/runtime-proxy.md b/versioned_docs/version-v1.4/designs/runtime-proxy.md new file mode 100644 index 000000000..47775c107 --- /dev/null +++ b/versioned_docs/version-v1.4/designs/runtime-proxy.md @@ -0,0 +1,153 @@ +# RuntimeProxy + +## Summary + +KoordRuntimeProxy acts as a proxy between kubelet and containerd(dockerd under dockershim scenario), which is designed to +intercept CRI request, and apply some resource management policies, such as setting different cgroup parameters by pod +priorities under hybrid workload orchestration scenario, applying new isolation policies for latest Linux kernel, +CPU architecture, and etc. + +There are two components involved, KoordRuntimeProxy and RuntimePlugins. + +![image](/img/koord-runtime-proxy-architecture.svg) + +## Goals + +- Enhance resource management for QoS based Scheduling. +- Provide interface for new isolation features which are not supported by CRI. + +## Components + +### KoordRuntimeProxy + +KoordRuntimeProxy is in charge of intercepting request during pod's lifecycle, such as RunPodSandbox, CreateContainer etc., +and then calling RuntimePlugins to do resource isolation policies before transferring request to backend containerd(dockerd) +and after transferring response to kubelet. KoordRuntimeProxy provides an isolation-policy-execution framework which allows +customized plugins registered to do specified isolation policies, these plugins are called RuntimePlugins. +KoordRuntimeProxy itself does NOT do any isolation policies. + +### RuntimePlugins + +RuntimePlugins register events(RunPodSandbox etc.) to KoordRuntimeProxy and would receive notifications when events happen. +RuntimePlugins should complete resource isolation policies basing on the notification message, and then response +KoordRuntimeProxy, KoordRuntimeProxy would decide to transfer request to backend containerd or discard request according to +plugins' response. + +If no RuntimePlugins registered, KoordRuntimeProxy would become a transparent proxy between kubelet and containerd. + +## Architecture + +![image](/img/koord-runtime-proxy-design.svg) + +There are 4 main components for KoordRuntimeProxy. + +### CRI Server + +As a proxy between kubelet and containerd, KoordRuntimeProxy acts as a CRI server for kubelet(http server under dockershim +scenario). It should intercept all request from kubelet, and generate protocols for talking with plugins before and +after talking with backend containerd(dockerd) + +### Plugins Manager + +PluginsManager is in charge of parsing registered plugin info from `/etc/runtime/hookserver.d` dynamically. + +### Runtime Dispatcher + +RuntimeDispatcher is designed to manage communications with plugins. + +### Store + +As a proxy, KoordRuntimeProxy had better be designed as stateless, but sometimes it does NOT work. Take StartContainer hook +for example, there exists only containerID in CRI StartContainerRequest, which is not enough for plugins to adapt policies +since plugins may not store pod/container info(such as meta, priority) locally. So KoordRuntimeProxy should store pod/container +info during RunPodSandbox/CreateContainer Stage. When StartContainer request comes, KoordRuntimeProxy can get pod/container info +by containerID, and then call plugins with pod/container info. + +With store, there would be pod/container info everytime KoordRuntimeProxy calls plugins, so there is no need for plugins to +store pod/container info exceptionally, plugins can be designed as stateless. + +Considering performance, store locates in memory and does not generate external io to disk. + +## Runtime Plugins + +### How to Register Plugins +All the plugin config files should be put to `/etc/runtime/hookserver.d` with `.json` suffix. You can register the plugin implemented by koordlet with RuntimeProxy: + +1. touch /etc/runtime/hookserver.d/koordlet.json +2. Copy the following content into /etc/runtime/hookserver.d/koordlet.json +``` +{ + "remote-endpoint": "/var/run/koordlet/koordlet.sock", + "failure-policy": "Ignore", + "runtime-hooks": [ + "PreRunPodSandbox", + "PreCreateContainer", + "PreStartContainer" + ] +} +``` + + +There are 3 fields involved: +- remote-endpoint: endpoint KoordRuntimeProxy talking with plugin, generated by plugin. +- failure-policy: policy when calling plugin fail, Fail or Ignore, default to Ignore. +- runtime-hooks: currently 7 hook points: + 1. PreRunPodSandbox + 2. PreCreateContainer + 3. PreStartContainer + 4. PostStartContainer + 5. PreUpdateContainerResources + 6. PostStopContainer + 7. PostStopPodSandbox + +hook points with prefix 'Pre' means calling plugins before transferring request to contianerd(dockerd). +hook points with prefix 'Post' means calling plugins after receiving response from containerd(dockerd). +plugin provider can set any hook combinations to "runtime-hooks". + +### Protocols between KoordRuntimeProxy and Plugins +[Protocols](https://github.com/koordinator-sh/koordinator/blob/main/apis/runtime/v1alpha1/api.proto) + +### Examples for Runtime Plugins +[koordlet-runtime-plugin-design](https://github.com/koordinator-sh/koordinator/blob/main/docs/design-archive/koordlet-runtime-hooks.md) + +## Installation + +### Installing from sources +get sources: `git clone https://github.com/koordinator-sh/koordinator.git` + +build: `cd koordinator; make build-koord-runtime-proxy` + +### Installing from packages +Download latest released package from: https://github.com/koordinator-sh/koordinator/releases + +### Setup Kubelet +Under containerd scenario, to make koord-runtime-proxy a proxy between kubelet and containerd, kubelet parameters should be altered as shown +below: +``` +kubelet --container-runtime=remote --container-runtime-endpoint=unix:///var/run/koord-runtimeproxy/runtimeproxy.sock +``` + +Under docker scenario, to make koord-runtime-proxy a proxy between kubelet and dockerd, kubelet parameters should be altered as shown +below: +``` +kubelet --docker-endpoint=unix:///var/run/koord-runtimeproxy/runtimeproxy.sock +``` + +### Setup KoordRuntimeProxy +Firstly, please make sure your runtime backend is containerd or dockerd. + +Under containerd scenario, koord-runtime-proxy can be setup with command: +``` +koord-runtime-proxy --remote-runtime-service-endpoint= + --remote-image-service-endpoint= +``` +If containerd listening CRI request on default /var/run/koord-runtimeproxy/runtimeproxy.sock, koord-runtime-proxy can be setup by: +``` +koord-runtime-proxy +``` + +Under docker scenario, koord-runtime-proxy should be setup with the additional parameter `--backend-runtime-mode Docker`, +and without `remote-image-service-endpoint`: +``` +koord-runtime-proxy --backend-runtime-mode=Docker --remote-runtime-service-endpoint= +``` diff --git a/versioned_docs/version-v1.4/installation.md b/versioned_docs/version-v1.4/installation.md new file mode 100644 index 000000000..180f5181c --- /dev/null +++ b/versioned_docs/version-v1.4/installation.md @@ -0,0 +1,170 @@ +# Installation + +Koordinator requires **Kubernetes version >= 1.18**. + +Koordinator need collect metrics from kubelet read-only port(default is disabled). +you can get more info form [here](https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/). + +For the best experience, koordinator recommands **linux kernel 4.19** or higher. + + +## Install with helms + +Koordinator can be simply installed by helm v3.5+, which is a simple command-line tool and you can get it from [here](https://github.com/helm/helm/releases). + +```bash +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Install the latest version. +$ helm install koordinator koordinator-sh/koordinator --version 1.4.0 +``` + +## Upgrade with helm + +```bash +# Firstly add koordinator charts repository if you haven't do this. +$ helm repo add koordinator-sh https://koordinator-sh.github.io/charts/ + +# [Optional] +$ helm repo update + +# Upgrade the latest version. +$ helm upgrade koordinator koordinator-sh/koordinator --version 1.4.0 [--force] +``` + +Note that: + +1. Before upgrade, you **must** firstly read the [Change Log](https://github.com/koordinator-sh/koordinator/blob/master/CHANGELOG.md) + to make sure that you have understand the breaking changes in the new version. +2. If you want to drop the chart parameters you configured for the old release or set some new parameters, + it is recommended to add `--reset-values` flag in `helm upgrade` command. + Otherwise you should use `--reuse-values` flag to reuse the last release's values. + +## Optional: download charts manually + +If you have problem with connecting to `https://koordinator-sh.github.io/charts/` in production, you might need to download the chart from [here](https://github.com/koordinator-sh/charts/releases) manually and install or upgrade with it. + +```bash +$ helm install/upgrade koordinator /PATH/TO/CHART +``` + +## Optional: Enable NRI Mode Resource Management + +### Prerequisite + +- Containerd >= 1.7.0 and enable NRI. Please make sure NRI is enabled in containerd. If not, please refer to [Enable NRI in Containerd](https://github.com/containerd/containerd/blob/main/docs/NRI.md) +- Koordinator >= 1.4 + +### Configurations + +NRI mode resource management is *Enabled* by default. You can use it without any modification on the koordlet config. You can also disable it to set `enable-nri-runtime-hook=false` in koordlet start args. It doesn't matter if all prerequisites are not meet. You can use all other features as expected. + +## Options + +Note that installing this chart directly means it will use the default template values for Koordinator. + +You may have to set your specific configurations if it is deployed into a production cluster, or you want to configure feature-gates. + +### Optional: chart parameters + +The following table lists the configurable parameters of the chart and their default values. + +| Parameter | Description | Default | +| ----------------------------------------- | ---------------------------------------------------------------- |---------------------------------| +| `featureGates` | Feature gates for Koordinator, empty string means all by default | ` ` | +| `installation.namespace` | namespace for Koordinator installation | `koordinator-system` | +| `installation.createNamespace` | Whether to create the installation.namespace | `true` | +| `imageRepositoryHost` | Image repository host | `ghcr.io` | +| `manager.log.level` | Log level that koord-manager printed | `4` | +| `manager.replicas` | Replicas of koord-manager deployment | `2` | +| `manager.image.repository` | Repository for koord-manager image | `koordinatorsh/koord-manager` | +| `manager.image.tag` | Tag for koord-manager image | `v1.4.0` | +| `manager.resources.limits.cpu` | CPU resource limit of koord-manager container | `1000m` | +| `manager.resources.limits.memory` | Memory resource limit of koord-manager container | `1Gi` | +| `manager.resources.requests.cpu` | CPU resource request of koord-manager container | `500m` | +| `manager.resources.requests.memory` | Memory resource request of koord-manager container | `256Mi` | +| `manager.metrics.port` | Port of metrics served | `8080` | +| `manager.webhook.port` | Port of webhook served | `9443` | +| `manager.nodeAffinity` | Node affinity policy for koord-manager pod | `{}` | +| `manager.nodeSelector` | Node labels for koord-manager pod | `{}` | +| `manager.tolerations` | Tolerations for koord-manager pod | `[]` | +| `manager.resyncPeriod` | Resync period of informer koord-manager, defaults no resync | `0` | +| `manager.hostNetwork` | Whether koord-manager pod should run with hostnetwork | `false` | +| `scheduler.log.level` | Log level that koord-scheduler printed | `4` | +| `scheduler.replicas` | Replicas of koord-scheduler deployment | `2` | +| `scheduler.image.repository` | Repository for koord-scheduler image | `koordinatorsh/koord-scheduler` | +| `scheduler.image.tag` | Tag for koord-scheduler image | `v1.4.0` | +| `scheduler.resources.limits.cpu` | CPU resource limit of koord-scheduler container | `1000m` | +| `scheduler.resources.limits.memory` | Memory resource limit of koord-scheduler container | `1Gi` | +| `scheduler.resources.requests.cpu` | CPU resource request of koord-scheduler container | `500m` | +| `scheduler.resources.requests.memory` | Memory resource request of koord-scheduler container | `256Mi` | +| `scheduler.port` | Port of metrics served | `10251` | +| `scheduler.nodeAffinity` | Node affinity policy for koord-scheduler pod | `{}` | +| `scheduler.nodeSelector` | Node labels for koord-scheduler pod | `{}` | +| `scheduler.tolerations` | Tolerations for koord-scheduler pod | `[]` | +| `scheduler.hostNetwork` | Whether koord-scheduler pod should run with hostnetwork | `false` | +| `koordlet.log.level` | Log level that koordlet printed | `4` | +| `koordlet.image.repository` | Repository for koordlet image | `koordinatorsh/koordlet` | +| `koordlet.image.tag` | Tag for koordlet image | `v1.4.0` | +| `koordlet.resources.limits.cpu` | CPU resource limit of koordlet container | `500m` | +| `koordlet.resources.limits.memory` | Memory resource limit of koordlet container | `256Mi` | +| `koordlet.resources.requests.cpu` | CPU resource request of koordlet container | `0` | +| `koordlet.resources.requests.memory` | Memory resource request of koordlet container | `0` | +| `koordlet.enableServiceMonitor` | Whether to enable ServiceMonitor for koordlet | `false` | +| `webhookConfiguration.failurePolicy.pods` | The failurePolicy for pods in mutating webhook configuration | `Ignore` | +| `webhookConfiguration.timeoutSeconds` | The timeoutSeconds for all webhook configuration | `30` | +| `crds.managed` | Koordinator will not install CRDs with chart if this is false | `true` | +| `imagePullSecrets` | The list of image pull secrets for koordinator image | `false` | + +Specify each parameter using the `--set key=value[,key=value]` argument to `helm install` or `helm upgrade`. + +### Optional: feature-gate + +Feature-gate controls some influential features in Koordinator: + +| Name | Description | Default | Effect (if closed) | +| ------------------------- | ---------------------------------------------------------------- | ------- | -------------------------------------- | +| `PodMutatingWebhook` | Whether to open a mutating webhook for Pod **create** | `true` | Don't inject koordinator.sh/qosClass, koordinator.sh/priority and don't replace koordinator extend resources ad so on | +| `PodValidatingWebhook` | Whether to open a validating webhook for Pod **create/update** | `true` | It is possible to create some Pods that do not conform to the Koordinator specification, causing some unpredictable problems | + + +If you want to configure the feature-gate, just set the parameter when install or upgrade. Such as: + +```bash +$ helm install koordinator https://... --set featureGates="PodMutatingWebhook=true\,PodValidatingWebhook=true" +``` + +If you want to enable all feature-gates, set the parameter as `featureGates=AllAlpha=true`. + +### Optional: the local image for China + +If you are in China and have problem to pull image from official DockerHub, you can use the registry hosted on Alibaba Cloud: + +```bash +$ helm install koordinator https://... --set imageRepositoryHost=registry.cn-beijing.aliyuncs.com +``` + +## Best Practices + +### Installation parameters for AWS EKS + +When using a custom CNI (such as Weave or Calico) on EKS, the webhook cannot be reached by default. This happens because the control plane cannot be configured to run on a custom CNI on EKS, so the CNIs differ between control plane and worker nodes. + +To address this, the webhook can be run in the host network so it can be reached, by setting `--set manager.hostNetwork=true` when use helm install or upgrade. + +## Uninstall + +Note that this will lead to all resources created by Koordinator, including webhook configurations, services, namespace, CRDs and CR instances managed by Koordinator controller, to be deleted! + +Please do this ONLY when you fully understand the consequence. + +To uninstall koordinator if it is installed with helm charts: + +```bash +$ helm uninstall koordinator +release "koordinator" uninstalled +``` diff --git a/versioned_docs/version-v1.4/introduction.md b/versioned_docs/version-v1.4/introduction.md new file mode 100644 index 000000000..2d1bc9b8c --- /dev/null +++ b/versioned_docs/version-v1.4/introduction.md @@ -0,0 +1,48 @@ +--- +title: Introduction +slug: / +--- + +# Introduction + +Welcome to Koordinator! + +## Overview + +Koordinator is a QQoS-based scheduling for efficient orchestration of microservices, AI, and big data workloads on Kubernetes. It aims to improve the runtime efficiency and reliability of both latency sensitive workloads and batch jobs, simplify the complexity of resource-related configuration tuning, and increase pod deployment density to improve resource utilizations. + + +## Key Features + +Koordinator enhances the kubernetes user experiences in the workload management by providing the following: + +- Well-designed priority and QoS mechanism to co-locate different types of workloads in a cluster and run different types of pods on a single node. +- Allowing for resource overcommitments to achieve high resource utilizations but still satisfying the QoS guarantees by leveraging an application profiling mechanism. +- Fine-grained resource orchestration and isolation mechanism to improve the efficiency of latency-sensitive workloads and batch jobs. +- Flexible job scheduling mechanism to support workloads in specific areas, e.g., big data, AI, audio and video. +- A set of tools for monitoring, troubleshooting and operations. + + +## Koordinator vs. Other Concept + +### Koordinator QoS vs Kubernetes QoS + +Kubernetes provides three types of QoS: Guaranteed/Burstable/BestEffort, of which Guaranteed/Burstable is widely used and BestEffort is rarely used. Koordinator is compatible with Kubernetes QoS and has numerous enhancements on each type. In order to avoid interfering with the native QoS semantics, Koordinator introduces an independent field ```koordinator.sh/qosClass``` to describe the co-location QoS. This QoS describes the service quality of the Pod running on the node in the co-location scenario. It is the most critical semantics of the mixed system. + +Koordinator is compatible with Kubernetes QoS and has numerous enhancements on each type. + +### Koordinator scheduler vs kube-scheduler + +Koordinator scheduler is **not** designed to replace kube-scheduler, but to make co-located workloads run **better** on kubernetes. + +Koordinator scheduler is developed based on schedule-framework, adding scheduling plugins related to co-location and priority preemption on top of native scheduling capabilities. Koordinator will be committed to promoting related enhancements into the upstream community of kubernetes and promoting the standardization of co-location technology. + + +## What's Next + +Here are some recommended next steps: + +- Start to [install Koordinator](./installation). +- Learn Koordinator's [Overview](architecture/overview). + + diff --git a/versioned_docs/version-v1.4/user-manuals/capacity-scheduling.md b/versioned_docs/version-v1.4/user-manuals/capacity-scheduling.md new file mode 100644 index 000000000..3b94fc75d --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/capacity-scheduling.md @@ -0,0 +1,645 @@ +# Capacity Scheduling - Elastic Quota Management + +Capacity Scheduling is an ability of koord-scheduler to manage different user's resource usage in a shared-cluster. + +## Introduction +When several users or teams share a cluster, fairness of resource allocation is very important. the Koordinator provides +multi-hierarchy elastic quota management mechanism for the scheduler. +- It supports configuring quota groups in a tree structure, which is similar to the organizational structure of most companies. +- It supports the borrowing / returning of resources between different quota groups, for better resource utilization efficiency. +The busy quota groups can automatically temporarily borrow the resources from the idle quota groups, which can improve the +utilization of the cluster. At the same time, when the idle quota group turn into the busy quota group, it can also automatically +take back the "lent-to" resources. +- It considers the resource fairness between different quota groups. When the busy quota groups borrow the +resources from the idle quota groups, the resources can be allocated to the busy quota groups under some fair rules. + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 +- Koordinator >= 0.71 + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to [Installation](/docs/installation). + +### Configurations + +Capacity-Scheduling is *Enabled* by default. You can use it without any modification on the koord-descheduler config. + +## Use Capacity-Scheduling + +### Quick Start by Label + +1.Create a Deployment `quota-example` with the YAML file below. + +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-example + namespace: default + labels: + quota.scheduling.koordinator.sh/parent: "" + quota.scheduling.koordinator.sh/is-parent: "false" +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 10 + memory: 20Mi +``` + +```bash +$ kubectl apply -f quota-example.yaml + elasticquota.scheduling.sigs.k8s.io/quota-example created + +$ kubectl get eqs -n default + NAME AGE + test-d 2s +``` + +2.Create a pod `pod-example` with the YAML file below. +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example + namespace: default + labels: + quota.scheduling.koordinator.sh/name: "quota-example" +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +```bash +$ kubectl apply -f pod-example.yaml + pod/pod-example created +``` + +3.Verify `quota-example` has changed. +```bash +$ kubectl get eqs -n default quota-example -o yaml +``` +```yaml +kind: ElasticQuota +metadata: + annotations: + quota.scheduling.koordinator.sh/request: '{"cpu":"40m","memory":"40Mi"}' + quota.scheduling.koordinator.sh/runtime: '{"cpu":"40m","memory":"40Mi"}' + quota.scheduling.koordinator.sh/shared-weight: '{"cpu":"40","memory":"40Gi"}' + creationTimestamp: "2022-10-08T09:26:38Z" + generation: 2 + labels: + quota.scheduling.koordinator.sh/is-parent: "false" + quota.scheduling.koordinator.sh/parent: root + manager: koord-scheduler + operation: Update + time: "2022-10-08T09:26:50Z" + name: quota-example + namespace: default + resourceVersion: "39012008" +spec: + max: + cpu: "40" + memory: 40Gi + min: + cpu: "10" + memory: 20Mi +status: + used: + cpu: 40m + memory: 40Mi +``` + +### Quick Start by Namespace +1.Create namespace +```bash +$ kubectl create ns quota-example + namespace/quota-example created +``` + +2.Create a Deployment `quota-example` with the YAML file below. + +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-example + namespace: quota-example + labels: + quota.scheduling.koordinator.sh/parent: "" + quota.scheduling.koordinator.sh/is-parent: "false" +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 10 + memory: 20Mi +``` + +```bash +$ kubectl apply -f quota-example.yaml + elasticquota.scheduling.sigs.k8s.io/quota-example created + +$ kubectl get eqs -n quota-example + NAME AGE + test-d 2s +``` + +2.Create a pod `pod-example` with the YAML file below. +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example + namespace: quota-example +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +```bash +$ kubectl apply -f pod-example.yaml + pod/pod-example created +``` + +3.Verify `quota-example` has changed. +```bash +$ kubectl get eqs -n quota-example quota-example -o yaml +``` +```yaml +kind: ElasticQuota +metadata: + annotations: + quota.scheduling.koordinator.sh/request: '{"cpu":"40m","memory":"40Mi"}' + quota.scheduling.koordinator.sh/runtime: '{"cpu":"40m","memory":"40Mi"}' + quota.scheduling.koordinator.sh/shared-weight: '{"cpu":"40","memory":"40Gi"}' + creationTimestamp: "2022-10-08T09:26:38Z" + generation: 2 + labels: + quota.scheduling.koordinator.sh/is-parent: "false" + quota.scheduling.koordinator.sh/parent: root + manager: koord-scheduler + operation: Update + time: "2022-10-08T09:26:50Z" + name: quota-example + namespace: quota-example + resourceVersion: "39012008" +spec: + max: + cpu: "40" + memory: 40Gi + min: + cpu: "10" + memory: 20Mi +status: + used: + cpu: 40m + memory: 40Mi +``` + +### Quota Debug Api. +```bash +$ kubectl -n koordinator-system get lease koord-scheduler --no-headers | awk '{print $2}' | cut -d'_' -f1 | xargs -I {} kubectl -n koordinator-system get pod {} -o wide --no-headers | awk '{print $6}' + 10.244.0.64 + +$ curl 10.244.0.64:10251/apis/v1/plugins/ElasticQuota/quota/quota-example +``` + +```json +{ + "allowLentResource": true, + "autoScaleMin": { + "cpu": "10", + "memory": "20Mi", + }, + "isParent": false, + "max": { + "cpu": "40", + "memory": "40Gi", + }, + "min": { + "cpu": "10", + "memory": "20Mi", + }, + "name": "quota-example", + "parentName": "root", + "podCache": { + "pod-example": { + "isAssigned": true, + "resource": { + "cpu": "40m", + "memory": "40Mi" + } + } + }, + "request": { + "cpu": "40m", + "memory": "40Mi" + }, + "runtime": { + "cpu": "40m", + "memory": "41943040", + }, + "runtimeVersion": 39, + "sharedWeight": { + "cpu": "40", + "memory": "40Gi", + }, + "used": { + "cpu": "40m", + "memory": "40Mi" + } +} +``` +The main different with yaml is that we can find all quota's pods and its status in `podCache`. + +### Advanced Configurations +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: false + quota.scheduling.koordinator.sh/parent: "parent" + quota.scheduling.koordinator.sh/allow-lent-resource: true + quota.scheduling.koordinator.sh/shared-weight: '{"cpu":"40","memory":"40Gi"}' +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 10 + memory: 20Mi +``` + +- `quota.scheduling.koordinator.sh/is-parent` is disposed by the user. It reflects the "child\parent" attribute of the quota group. Default is child. +- `quota.scheduling.koordinator.sh/parent` is disposed by the user. It reflects the parent quota name. Default is root. +- `quota.scheduling.koordinator.sh/shared-weight` is disposed by the user. It reflects the ability to share the "lent to" resource. Default equals to "max". +- `quota.scheduling.koordinator.sh/allow-lent-resource` is disposed by the user. It reflects whether quota group allows lent unused "min" to others. + +### WebHook Verify +1.Except for the first level quota group, we require that the sum of "min" of all sub quota groups should be less than or +equal to the "min" of parent group. + +first create parent quota: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-parent-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: true +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 10 + memory: 20Mi +``` + +then create child quota: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: false + quota.scheduling.koordinator.sh/parent: "quota-parent-example" +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 20 + memory: 20Mi +``` + +```bash +kubectl apply -f quota-example.yaml +Error from server: error when creating "quota-example.yaml": admission webhook "vquota.kb.io" denied the request: checkMinQuotaSum allChildren SumMinQuota > parentMinQuota, parent: quota-parent-example +``` + +2.Parent and child's min\max resource key must same. +first create parent quota: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-parent-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: true +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 10 + memory: 20Mi +``` + +then create child quota: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: false + quota.scheduling.koordinator.sh/parent: "quota-parent-example" +spec: + max: + cpu: 40 + memory: 40Gi + test: 200 + min: + cpu: 10 + memory: 20Mi +``` + +```bash +$ kubectl apply -f quota-example.yaml + Error from server: error when creating "quota-example.yaml": admission webhook "vquota.kb.io" denied the request: checkSubAndParentGroupMaxQuotaKeySame failed: quota-parent-example's key is not the same with quota-example +``` + +3.Parent group cannot run pod. + +first create parent quota: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-parent-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: true +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 10 + memory: 20Mi +``` + +then create pod: +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example + namespace: default + labels: + quota.scheduling.koordinator.sh/name: "quota-parent-example" +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +```bash +$ kubectl apply -f pod-example_xb.yaml + Error from server: error when creating "pod-example.yaml": admission webhook "vpod.kb.io" denied the request: pod can not be linked to a parentQuotaGroup,quota:quota-parent-example, pod:pod-example +``` + +4.The parent of node can only be parent group, not child group. + +first create parent quota: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-parent-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: false +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 10 + memory: 20Mi +``` + +then create child quota: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: false + quota.scheduling.koordinator.sh/parent: "quota-parent-example" +spec: + max: + cpu: 40 + memory: 40Gi + test: 200 + min: + cpu: 10 + memory: 20Mi +``` + +```bash +$ kubectl apply -f quota-example.yaml + Error from server: error when creating "elastic-quota-example_xb.yaml": admission webhook "vquota.kb.io" denied the request: quota-example has parentName quota-parent-example but the parentQuotaInfo's IsParent is false +``` + +5.A quota group can't be converted on the attribute of parent group\child group. + +first create parent quota: +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: ElasticQuota +metadata: + name: quota-parent-example + namespace: default + labels: + quota.scheduling.koordinator.sh/is-parent: true +spec: + max: + cpu: 40 + memory: 40Gi + min: + cpu: 10 + memory: 20Mi +``` + +then modify `quota.scheduling.koordinator.sh/is-parent:false`: +```bash +$ kubectl apply -f quota-parent-example.yaml + elastic-quota-example_xb_parent.yaml": admission webhook "vquota.kb.io" denied the request: IsParent is forbidden modify now, quotaName:quota-parent-example +``` + +### used > runtime revoke +We offer a config to control if quota's used > runtime, we allow the scheduler to delete over-resource-used pod from +low priority to high priority. you should follow the below config of `koord-scheduler-config.yaml` in helm. + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: koord-scheduler-config + namespace: {{ .Values.installation.namespace }} +data: + koord-scheduler-config: | + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: KubeSchedulerConfiguration + leaderElection: + leaderElect: true + resourceLock: leases + resourceName: koord-scheduler + resourceNamespace: {{ .Values.installation.namespace }} + profiles: + - pluginConfig: + - name: ElasticQuota + args: + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: ElasticQuotaArgs + quotaGroupNamespace: {{ .Values.installation.namespace }} + enableCheckParentQuota: true + monitorAllQuotas: true + revokePodInterval: 60s + delayEvictTime: 300s + plugins: + queueSort: + disabled: + - name: "*" + enabled: + - name: Coscheduling + preFilter: + enabled: + - name: NodeNUMAResource + - name: DeviceShare + - name: Reservation + - name: Coscheduling + - name: ElasticQuota + filter: + ... +``` +- `enableCheckParentQuota` check parentQuotaGroups' used and runtime Quota. Default is false. +- `monitorAllQuotas` enable "used > runtime revoke" logic. Default is false. +- `revokePodInterval` check loop time interval. +- `delayEvictTime` when "used > runtime" continues over `delayEvictTime` will really trigger eviction. + +To let scheduler can really delete the pod successfully, you should config the `rbac/koord-scheduler.yaml` as below in helm. + +```yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: koord-scheduler-role +rules: +{{- if semverCompare "<= 1.20-0" .Capabilities.KubeVersion.Version }} +- apiGroups: + - "" + resources: + - namespaces + verbs: + - get + - list + - watch +{{- end }} +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - update +- apiGroups: + - "" + resources: + - pods + verbs: + - patch + - update + - delete +- apiGroups: + - "" + resources: + - pods/eviction + verbs: + - create +- apiGroups: + ... +``` + +To prevent Pods from being revoked, you can add label `quota.scheduling.koordinator.sh/preemptible: false` to the Pod: +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example + namespace: default + labels: + quota.scheduling.koordinator.sh/name: "quota-example" + quota.scheduling.koordinator.sh/preemptible: false +spec: +... +``` +In this case, the Pod is not allowed to use resources exceeding the `Min`. +Since the "Min" resources are the guaranteed resources, the Pod will not be evicted. \ No newline at end of file diff --git a/versioned_docs/version-v1.4/user-manuals/colocation-profile.md b/versioned_docs/version-v1.4/user-manuals/colocation-profile.md new file mode 100644 index 000000000..3d3c3c540 --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/colocation-profile.md @@ -0,0 +1,137 @@ +--- +sidebar_position: 1 +--- + +# Colocation Profile + +## Motivation + +If the workloads in the existing cluster want to be co-located through Koordinator, you need to modify the existing Controller/Operator to support protocols such as the QoS class, priority, and resource model defined by Koordinator. +In order to avoid repeated construction and make it easier for everyone to obtain the benefits of co-location technology, Koordinator defines `ClusterColocationProfile` CRD, and implements webhook modify and verify newly created Pods, inject the fields described in `ClusterColocationProfile`. + + +## Architecture + +![image](/img/clustercolocationprofile-arch.png) + +## feature-gates + +ClusterColocationProfile mutating/validating feature is turned on by default, if you want to turn it off set feature-gates: + +```bash +$ helm install koordinator https://... --set featureGates="PodMutatingWebhook=false\,PodValidatingWebhook=false" +``` + + +## Spec definition + +If you are not familiar with Kubernetes resources please refer to the page [Understanding Kubernetes Objects](https://kubernetes.io/docs/concepts/overview/working-with-objects/kubernetes-objects/). + +- **namespaceSelector**: decides whether to mutate/validate Pods if the namespace matches the selector. Default to the empty LabelSelector, which will match everything. + +- **selector**: decides whether to mutate/validate Pods if the Pod matches the selector. Default to the empty LabelSelector, which will match everything. + +- **qosClass** (*required*): describes the type of Koordinator QoS that the Pod is running. The value will be injected into Pod as label koordinator.sh/qosClass. Options are `LSE`, `LSR`, `LS`, `BE`, and `SYSTEM`. For more information, please check [here](../architecture/qos). + +- **priorityClassName** (*required*): the priorityClassName and the priority value defined in PriorityClass will be injected into the Pod. Options are `koord-prod`, `koord-mid`, `koord-batch`, and `koord-free`. For more information, please check [here](../architecture/priority). + +- **koordinatorPriority**: defines the Pod sub-priority in Koordinator. The priority value will be injected into Pod as label koordinator.sh/priority. Various Koordinator components determine the priority of the Pod in the Koordinator through KoordinatorPriority and the priority value in PriorityClassName. Higher the value, higher the priority. + +- **labels**: describes the k/v pair that needs to inject into `Pod.Labels`. + +- **annotations**: describes the k/v pair that needs to inject into `Pod.Annotations`. + +- **schedulerName**: if specified, the pod will be dispatched by specified scheduler. + +- **patch**: indicates Pod Template patching that user would like to inject into the Pod. + + +## Example + +### Create ClusterColocationProfile + +The `profile.yaml` file below describes to modify Pod in Namepspace with label `koordinator.sh/enable-colocation=true` and inject Koordinator QoS, Koordinator Priority etc. + +```yaml +apiVersion: config.koordinator.sh/v1alpha1 +kind: ClusterColocationProfile +metadata: + name: colocation-profile-example +spec: + namespaceSelector: + matchLabels: + koordinator.sh/enable-colocation: "true" + selector: + matchLabels: + koordinator.sh/enable-colocation: "true" + qosClass: BE + priorityClassName: koord-batch + koordinatorPriority: 1000 + schedulerName: koord-scheduler + labels: + koordinator.sh/mutated: "true" + annotations: + koordinator.sh/intercepted: "true" + patch: + spec: + terminationGracePeriodSeconds: 30 +``` + +Create a ClusterColocationProfile based on the YAML file: + +```bash +$ kubectl apply -f profile.yaml +``` + +### Verify ClusterColocationProfile works + +```yaml +apiVersion: v1 +kind: Pod +metadata: + labels: + koordinator.sh/enable-colocation: "true" + name: test-pod +spec: + containers: + - name: app + image: nginx:1.15.1 + resources: + limits: + cpu: "1" + memory: "3456Mi" + requests: + cpu: "1" + memory: "3456Mi" +``` + +Create this pod and now you will find it's injected with Koordinator QoS, Koordinator Priority etc. + +```bash +$ kubectl get pod test-pod -o yaml +apiVersion: v1 +kind: Pod +metadata: + annotations: + koordinator.sh/intercepted: true + labels: + koordinator.sh/qosClass: BE + koordinator.sh/priority: 1000 + koordinator.sh/mutated: true + ... +spec: + terminationGracePeriodSeconds: 30 + priority: 5000 + priorityClassName: koord-batch + schedulerName: koord-scheduler + containers: + - name: app + image: nginx:1.15.1 + resources: + limits: + kubernetes.io/batch-cpu: "1000" + kubernetes.io/batch-memory: 3456Mi + requests: + kubernetes.io/batch-cpu: "1000" + kubernetes.io/batch-memory: 3456Mi +``` diff --git a/versioned_docs/version-v1.4/user-manuals/cpu-burst.md b/versioned_docs/version-v1.4/user-manuals/cpu-burst.md new file mode 100644 index 000000000..315ab8661 --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/cpu-burst.md @@ -0,0 +1,197 @@ +# CPU Burst + +## Introduction + +CPU Burst is a service level objective (SLO)-aware resource scheduling feature provided by Koordinator. You can use CPU Burst to improve the performance of latency-sensitive applications. CPU scheduling for a container may be throttled by the kernel due to the CPU limit, which downgrades the performance of the application. The koordlet component automatically detects CPU throttling events and automatically adjusts the CPU limit to a proper value. This greatly improves the performance of latency-sensitive applications. + +### How CPU Burst works + +Kubernetes allows you to specify CPU limits, which can be reused based on time-sharing. If you specify a CPU limit for a container, the OS limits the amount of CPU resources that can be used by the container within a specific time period. For example, you set the CPU limit of a container to 2. The OS kernel limits the CPU time slices that the container can use to 200 milliseconds within each 100-millisecond period. + +CPU utilization is a key metric that is used to evaluate the performance of a container. In most cases, the CPU limit is specified based on CPU utilization. CPU utilization on a per-millisecond basis shows more spikes than on a per-second basis. If the CPU utilization of a container reaches the limit within a 100-millisecond period, CPU throttling is enforced by the OS kernel and threads in the container are suspended for the rest of the time period, as shown in the following figure. + +![image](/img/cpu-throttles.png) + +The following figure shows the thread allocation of a web application container that runs on a node with four vCPUs. The CPU limit of the container is set to 2. The overall CPU utilization within the last second is low. However, Thread 2 cannot be resumed until the third 100-millisecond period starts because CPU throttling is enforced somewhere in the second 100-millisecond period. This increases the response time (RT) and causes long-tail latency problems in containers. + +![image](/img/cpu-throttles-1.png) + +Upstream Linux kernel >=5.14 and Anolis OS both provide [Burstable CFS Controller](https://github.com/torvalds/linux/commit/f4183717b370ad28dd0c0d74760142b20e6e7931#diff-cc1a82129952a910fdc4292448c2a097a2ba538bebefcf3c06381e45639ae73e), namely *CPU Burst* feature. It allows a container to accumulate CPU time slices when the container is idle. The container can use the accumulated CPU time slices to burst above the CPU limit when CPU utilization spikes. This improves performance and reduces the RT of the container. + +![image](/img/cpu-throttles-2.png) + +For kernel versions that do not support CPU Burst, koordlet detects CPU throttling events and dynamically adjusts the CPU limit to achieve the same effect as CPU Burst. + +For more information about CPU Burst, see the presentation at KubeCon 2021: [CPU Burst: Getting Rid of Unnecessary Throttling, Achieving High CPU Utilization and Application Performance at the Same Time](https://kccncosschn21.sched.com/event/pcdF?spm=a2c63.p38356.0.0.2ec3731dhQbCIe&iframe=no). + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 +- Koordinator >= 0.3 + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to +[Installation](/docs/installation). + +### Configurations + +Koordlet has already enabled CPU Burst feature (`-feature-gates=AllAlpha=true`). If not, please enable it manually by updating the feature gate in the koordlet daemonset. + +NOTE: CPU Burst is not available for `LSR` and `BE` pods since it targets on burstable cpu usages. + +```yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: koordlet +spec: + selector: + matchLabels: + koord-app: koordlet + template: + metadata: + labels: + koord-app: koordlet + spec: + containers: + - command: + - /koordlet + args: + - -CgroupRootDir=/host-cgroup/ + - -feature-gates=XXXX,CPUBurst=true # enable CPU Burst feature + ... +``` + +## Use CPU Burst + +### Use an annotation to enable CPU Burst for the pod + +Add the following annotation to the pod configuration to enable CPU Burst: + +```yaml +apiVersion: apps/v1 +kind: Pod +metadata: + name: demo-pod-xxx + annotations: + # Set the value to auto to enable CPU Burst for the pod. + koordinator.sh/cpuBurst: '{"policy": "auto"}' + # To disable CPU Burst for the pod, set the value to none. + #koordinator.sh/cpuBurst: '{"policy": "none"}' +``` + +### Use a ConfigMap to enable CPU Burst for all pods in a cluster + +Modify the slo-controller-config ConfigMap based on the following content to enable CPU Burst for all pods in a cluster: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + cpu-burst-config: '{"clusterStrategy": {"policy": "auto"}}' + #cpu-burst-config: '{"clusterStrategy": {"policy": "cpuBurstOnly"}}' + #cpu-burst-config: '{"clusterStrategy": {"policy": "none"}}' +``` + +### (Optional) Advanced Settings + +The following code block shows the pod annotations and ConfigMap fields that you can use for advanced configurations: + +```yaml +# Example of the slo-controller-config ConfigMap. +data: + cpu-burst-config: | + { + "clusterStrategy": { + "policy": "auto", + "cpuBurstPercent": 1000, + "cfsQuotaBurstPercent": 300, + "sharePoolThresholdPercent": 50, + "cfsQuotaBurstPeriodSeconds": -1 + } + } + + # Example of pod annotations. + koordinator.sh/cpuBurst: '{"policy": "auto", "cpuBurstPercent": 1000, "cfsQuotaBurstPercent": 300, "cfsQuotaBurstPeriodSeconds": -1}' +``` + +The following table describes the ConfigMap fields that you can use for advanced configurations of CPU Burst. + +| Field | Data type | Description | +| ---------------------------- | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| policy | string |

| +| cpuBurstPercent | int | Default value:`1000`. Unit: %. This field specifies the percentage to which the CPU limit can be increased by CPU Burst. If the CPU limit is set to `1`, CPU Burst can increase the limit to 10 by default. | +| cfsQuotaBurstPercent | int | Default value:`300`. Unit: %. This field specifies the maximum percentage to which the value of cfs_quota in the cgroup parameters can be increased. By default, the value of cfs_quota can be increased to at most three times. | +| cfsQuotaBurstPeriodSeconds | int | Default value:`-1`. Unit: seconds. This indicates that the time period in which the container can run with an increased CFS quota is unlimited. This field specifies the time period in which the container can run with an increased CFS quota, which cannot exceed the upper limit specified by `cfsQuotaBurstPercent`. | +| sharePoolThresholdPercent | int | Default value:`50`. Unit: %. This field specifies the CPU utilization threshold of the node. If the CPU utilization of the node exceeds the threshold, the value of cfs_quota in cgroup parameters is reset to the original value. | + +### Verify CPU Burst + +1. Use the following YAML template to create an apache-demo.yaml file. + +> To enable CPU Burst for a pod, specify an annotation in the annotations parameter of the metadata section of the pod configuration. + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: apache-demo + annotations: + koordinator.sh/cpuBurst: '{"policy": "auto"}' # Use this annotation to enable or disable CPU Burst. +spec: + containers: + - command: + - httpd + - -D + - FOREGROUND + image: koordinatorsh/apache-2-4-51-for-slo-test:v0.1 + imagePullPolicy: Always + name: apache + resources: + limits: + cpu: "4" + memory: 10Gi + requests: + cpu: "4" + memory: 10Gi + nodeName: # $nodeName Set the value to the name of the node that you use. + hostNetwork: False + restartPolicy: Never + schedulerName: default-scheduler +``` + +2. Run the following command to create an application by using Apache HTTP Server. + +```bash +kubectl apply -f apache-demo.yaml +``` + +3. Use the wrk2 tool to perform stress tests. + +```bash +# Download, decompress, and then install the wrk2 package. +# The Gzip module is enabled in the configuration of the Apache application. The Gzip module is used to simulate the logic of processing requests on the server. +# Run the following command to send requests. Replace the IP address in the command with the IP address of the application. +./wrk -H "Accept-Encoding: deflate, gzip" -t 2 -c 12 -d 120 --latency --timeout 2s -R 24 http://$target_ip_address:8010/static/file.1m.test +``` + +4. Check the results of CPU Burst enabled and disabled. + +e.g. We may have the following results: + +| CentOS 7 | Disabled | Enabled | +| ----------------------------- | ----------- | ------------------- | +| apache RT-p99 | 111.69 ms | 71.30 ms (-36.2%) | +| CPU Throttled Ratio | 33% | 0% | +| Average pod CPU utilization | 32.5% | 33.8% | + +The preceding metrics indicate the following information: + +- After CPU Burst is enabled, the P99 latency of apache is greatly reduced. +- After CPU Burst is enabled, CPU throttling is stopped and the average pod CPU utilization remains approximately at the same value. diff --git a/versioned_docs/version-v1.4/user-manuals/cpu-evict.md b/versioned_docs/version-v1.4/user-manuals/cpu-evict.md new file mode 100644 index 000000000..f8f8e03dc --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/cpu-evict.md @@ -0,0 +1,144 @@ +# Eviction Strategy base on CPU Satisfaction + +## Introduction +Koordinator supports [CPU Suppress](/docs/user-manuals/cpu-suppress) strategy, which is used for limiting the available +CPU Cores of low-priority Pods (BE) according to the usage of high-priority Pods (LS) under during co-location. When the +resource usage of LS Pods increases, `Koordlet` will reduce the CPU cores that can be used by BE Pods. However, when the +LS Pod utilization increases suddenly, large number of BE Pods could be suppressed on small number of CPUs, resulting in +the low resource satisfaction of BE pods, moreover, there might be some additional competition on kernel resources. + +In fact, most BE pods are batch computing type, which have well failover abilities, and the eviction is acceptable for +them since they can retry with better resource quality on other nodes. `Koordlet` provides an eviction strategy based +on CPU resource satisfaction. When the utilization and resource satisfaction exceed the threshold at the same time, +pods with lower priority and higher CPU utilization will be evicted first until the CPU satisfaction has returned +above the threshold. + +![image](/img/cpu-evict.svg) + +### Prerequisite +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to +[Installation](/docs/installation). +Batch resource overcommitment and cpu suppress strategy must be enabled first, see this [manual](/docs/user-manuals/cpu-suppress) +for more details. + +| Component | Version Requirement | +| --- | ------- | +| Kubernetes | ≥v1.18 | +| koordinator | ≥v0.3.0 | + +The eviction strategy is provided by `Koordlet`, which is disabled by default in feature-gate. +Please make sure the `BECPUEvict=true` field has been added in the `-feature-gates` arguments of `Koordlet` +as the [example](https://github.com/koordinator-sh/charts/blob/main/versions/v1.2.0/templates/koordlet.yaml#L36)。 + +## Use Eviction Strategy base on CPU Satisfaction + +1. Create a configmap.yaml file based on the following ConfigMap content: + ```yaml + #ConfigMap slo-controller-config example。 + apiVersion: v1 + kind: ConfigMap + metadata: + name: slo-controller-config # name should be set as the configuration of koord-manager, e.g. ack-slo-config + namespace: koordinator-system # namespace should be set as the configuration of installation, e.g. kube-system + data: + # enable the eviction strategy base on CPU satisfaction + resource-threshold-config: | + { + "clusterStrategy": { + "enable": true, + "cpuEvictBESatisfactionLowerPercent": 60, + "cpuEvictBESatisfactionUpperPercent": 80, + "cpuEvictBEUsageThresholdPercent": 90, + "CPUEvictTimeWindowSeconds": 60 + } + } + ``` + + | Configuration item | Parameter | Valid values | Description | + | :-------------- | :------ | :-------- | :----------------------------------------------------------- | + | `enable` | Boolean | true; false | true:enable the eviction.; false(default): disable the eviction. | + | `cpuEvictBESatisfactionLowerPercent` | Int | 0~60 | eviction threshold percent of BE CPU satisfaction. BE pods will be evicted if the satisfaction less than the threshold. | + | `cpuEvictBESatisfactionUpperPercent` | Int | cpuEvictBESatisfactionLowerPercent~100 | threshold percent of BE CPU satisfaction. eviction will be stopped if the satisfaction greater than the threshold. | + | `cpuEvictBEUsageThresholdPercent` | Int | 0~100 | threshold of BE CPU utilization. Pods will be evicted if the BE utilization under the suppressed CPU greater than the threshold. default value is 90. | + | `cpuEvictTimeWindowSeconds` | Int | >=2 | time window by seconds during calculating the CPU satisfaction and BE CPU utilization. | + +2. Check whether a ConfigMap named `slo-controller-config` exists in the `koordinator-system` namespace. + + - If a ConfigMap named `slo-controller-config` exists, we commend that you run the kubectl patch command to update the ConfigMap. This avoids changing other settings in the ConfigMap. + + ```bash + kubectl patch cm -n koordinator-system slo-controller-config --patch "$(cat configmap.yaml)" + ``` + + - If no ConfigMap named `slo-controller-config` exists, run the kubectl patch command to create a ConfigMap named ack-slo-config: + + ```bash + kubectl apply -f configmap.yaml + ``` + +3. Create a file named be-pod-demo.yaml based on the following YAML content: + ```yaml + apiVersion: v1 + kind: Pod + metadata: + name: be-pod-demo + labels: + koordinator.sh/qosClass: 'BE' # set Pod QoS as BE + spec: + containers: + - args: + - '-c' + - '4' + - '--vm' + - '1' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + limits: + kubernetes.io/batch-cpu: 4k + kubernetes.io/batch-memory: 4Gi + requests: + kubernetes.io/batch-cpu: 4k + kubernetes.io/batch-memory: 4Gi + restartPolicy: Always + schedulerName: default-scheduler + # priorityClassName is required when ColocationProfile enabled (default). + priorityClassName: koord-batch + ``` + +4. Run the following command to deploy the be-pod-demo pod in the cluster: + + ```bash + kubectl apply -f be-pod-demo.yaml + ``` + +5. Run the following command to check the be-pod-demo pod in Running state: + + ```bash + $ kubectl get pod be-pod-demo + NAME READY STATUS RESTARTS AGE + be-pod-demo 1/1 Running 0 7s + +6. Run the following command through [stress tool](https://linux.die.net/man/1/stress) +make sure the memory usage of node is above the threshold config, and the argument `--cpu` +means the process will consume 10 cores, this should be adjusted according to the node capacity. + + ```bash + $ stress --cpu 1 --vm 1 --vm-bytes 10G --vm-keep + ``` + +7. Check the running state of be-pod-demo, then you can find the be-pod-demo pod is not exist, +and the eviction information can be found in events. + + ```bash + $ kubectl get pod be-pod-demo + Error from server (NotFound): pods "be-pod-demo" not found + + $ kubectl get event + LAST SEEN TYPE REASON OBJECT MESSAGE + 44s Normal Killing pod/be-pod-demo Stopping container stress + 44s Warning evictPodSuccess ${your-pod-object} evict Pod:be-pod-demo, reason: EvictPodByBECPUSatisfaction, message: killAndEvictBEPodsRelease for node(${your-node-id}), need realase CPU : 1200 + ``` diff --git a/versioned_docs/version-v1.4/user-manuals/cpu-qos.md b/versioned_docs/version-v1.4/user-manuals/cpu-qos.md new file mode 100644 index 000000000..68ae0f798 --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/cpu-qos.md @@ -0,0 +1,183 @@ +# CPU QoS + +## Introduction + +Kubernetes allows you to deploy various types of containerized applications on the same node. This causes applications with different priorities to compete for CPU resources. As a result, the performance of the applications with high priorities cannot be guaranteed. Koordinator allows you to use quality of service (QoS) classes to guarantee CPU resources for applications with high priorities. This topic describes how to configure the CPU QoS feature for pods. + +## Background + +To fully utilize computing resources, workloads of different priorities are usually deployed on the same node. For example, latency-sensitive (LS) workloads (with high priorities) and best-effort (BE) workloads (with low priorities) can be deployed on the same node. However, this may cause these workloads to compete for computing resources. In Kubernetes, CPU requests and CPU limits are used to control the amount of CPU resources that pods can use. However, pods may still compete for CPU resources. For example, BE pods and LS pods can share CPU cores or vCPU cores. When the loads of the BE pods increase, the performance of the LS pods is compromised. As a result, the response latency of the application that uses the LS pods increases. + +To reduce the performance impact on the BE pods in this scenario, you can use the CPU QoS feature provided by Koordinator to limit the CPU usage of the LS pods. The CPU QoS feature is based on Alibaba Cloud Linux 2 and Anolis OS. Koordinator allows you to use the group identity feature available in Alibaba Cloud Linux 2 to configure Linux scheduling priorities for pods. In an environment where both LS pods and BE pods are deployed, you can set the priority of LS pods to high and the priority of BE pods to low to avoid resource contention. The LS pods are prioritized to use the limited CPU resources to ensure the service quality of the corresponding application. For more information, see [Group identity feature](https://www.alibabacloud.com/help/en/elastic-compute-service/latest/group-identity-feature). + +You can gain the following benefits after you enable the CPU QoS feature: + +- The wake-up latency of tasks for LS workloads is minimized. +- Waking up tasks for BE workloads does not adversely impact the performance of LS pods. +- Tasks for BE workloads cannot use the simultaneous multithreading (SMT) scheduler to share CPU cores. This further reduces the impact on the performance of LS pods. + +## Setup + +### Prerequisites + +- Kubernetes >= 1.18 +- Koordinator >= 0.4 +- Operating System: + - Alibaba Cloud Linux 2(For more information, see [Group identity feature](https://www.alibabacloud.com/help/en/elastic-compute-service/latest/group-identity-feature)) + - Anolis OS >= 8.6 + - CentOS 7.9 (Need to install the CPU Co-location scheduler plug-in from OpenAnolis community, see [best practice](../best-practices/anolis_plugsched.md)). + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to [Installation](https://koordinator.sh/docs/installation). + +## Use CPU QoS + +1. Create a configmap.yaml file based on the following ConfigMap content: + + ```yaml + # Example of the slo-controller-config ConfigMap. + apiVersion: v1 + kind: ConfigMap + metadata: + name: slo-controller-config + namespace: koordinator-system + data: + # Enable the CPU QoS feature. + resource-qos-config: | + { + "clusterStrategy": { + "lsClass": { + "cpuQOS": { + "enable": true, + "groupIdentity": 2 + } + }, + "beClass": { + "cpuQOS": { + "enable": true, + "groupIdentity": -1 + } + } + } + } + ``` + + Specify `lsClass` and `beClass` to assign the LS and BE classes to different pods. `cpuQOS` includes the CPU QoS parameters. The following table describes the parameters. + +| Configuration item | Parameter | Valid values | Description | +| :----------------- | :-------- | :----------- | :----------------------------------------------------------- | +| `enable` | Boolean | truefalse | true: enables the CPU QoS feature for all containers in the cluster.false: disables the CPU QoS feature for all containers in the cluster. | +| `groupIdentity` | Int | -1~2 | Specify group identities for CPU scheduling. By default, the group identity of LS pods is 2 and the group identity of BE pods is -1. A value of 0 indicates that no group identity is assigned.A greater `group identity` value indicates a higher priority in CPU scheduling. For example, you can set `cpu.bvt_warp_ns=2` for LS pods and set `cpu.bvt_warp_ns=-1` for BE pods because the priority of LS pods is higher than that of BE pods. For more information, see [Group identity feature](https://www.alibabacloud.com/help/en/elastic-compute-service/latest/group-identity-feature#task-2129392). | + + **Note** If `koordinator.sh/qosClass` is not specified for a pod, Koordinator configures the pod based on the original QoS class of the pod. The component uses the BE settings in the preceding ConfigMap if the original QoS class is BE. The component uses the LS settings in the preceding ConfigMap if the original QoS class is not BE + +2. Check whether a ConfigMap named `slo-controller-config` exists in the `koordinator-system` namespace. + + - If a ConfigMap named `slo-controller-config` exists, we commend that you run the kubectl patch command to update the ConfigMap. This avoids changing other settings in the ConfigMap. + + ```bash + kubectl patch cm -n koordinator-system slo-controller-config --patch "$(cat configmap.yaml)" + ``` + + - If no ConfigMap named `slo-controller-config` exists, run the kubectl patch command to create a ConfigMap named ack-slo-config: + + ```bash + kubectl apply -f configmap.yaml + ``` + +3. Create a file named ls-pod-demo.yaml based on the following YAML content: + + ```yaml + apiVersion: v1 + kind: Pod + metadata: + name: ls-pod-demo + labels: + koordinator.sh/qosClass: 'LS' # Set the QoS class of the pod to LS + spec: + containers: + - command: + - "nginx" + - "-g" + - "daemon off; worker_processes 4;" + image: docker.io/koordinatorsh/nginx:v1.18-koord-example + imagePullPolicy: Always + name: nginx + resources: + limits: + cpu: "4" + memory: 10Gi + requests: + cpu: "4" + memory: 10Gi + restartPolicy: Never + schedulerName: default-scheduler + ``` + +4. Run the following command to deploy the ls-pod-demo pod in the cluster: + + ```bash + kubectl apply -f ls-pod-demo.yaml + ``` + +5. Run the following command to check whether the CPU group identity of the LS pod in the control group (cgroup) of the node takes effect: + + ```bash + cat /sys/fs/cgroup/cpu/kubepods.slice/kubepods-pod1c20f2ad****.slice/cpu.bvt_warp_ns + ``` + + Expected output: + + ```bash + #The group identity of the LS pod is 2 (high priority). + 2 + ``` + +6. Create a file named be-pod-demo.yaml based on the following content: + + ```yaml + apiVersion: v1 + kind: Pod + metadata: + name: be-pod-demo + labels: + koordinator.sh/qosClass: 'BE' # Set the QoS class of the pod to BE. + spec: + containers: + - args: + - '-c' + - '1' + - '--vm' + - '1' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + restartPolicy: Always + schedulerName: default-scheduler + # priorityClassName is required when ColocationProfile enabled (default). + priorityClassName: koord-batch + ``` + +7. Run the following command to deploy the be-pod-demo pod in the cluster: + + ```bash + kubectl apply -f be-pod-demo.yaml + ``` + +8. Run the following command to check whether the CPU group identity of the BE pod in the cgroup of the node takes effect: + + ```bash + cat /sys/fs/cgroup/cpu/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod4b6e96c8****.slice/cpu.bvt_warp_ns + ``` + + Expected output: + + ```bash + #The group identity of the BE pod is -1 (low priority). + -1 + ``` + + The output shows that the priority of the LS pod is high and the priority of the BE pod is low. CPU resources are preferably scheduled to the LS pod to ensure the service quality. diff --git a/versioned_docs/version-v1.4/user-manuals/cpu-suppress.md b/versioned_docs/version-v1.4/user-manuals/cpu-suppress.md new file mode 100644 index 000000000..56a5e1987 --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/cpu-suppress.md @@ -0,0 +1,102 @@ +# CPU Utilization Threshold Management +## Introduction +In order to ensure the runtime quality of different workloads in co-located scenarios, Koordinator uses the CPU Suppress +mechanism provided by koordlet on the node side to suppress workloads of the Best Effort type when the load increases. +Or increase the resource quota for Best Effort type workloads when the load decreases. + +In the [Dynamic resource overcommitment model](/architecture/resource-model.md) that is provided by +Koordinator, the total amount of reclaimed resources dynamically changes based on the actual amount of resources used +by latency-sensitive (LS/LSR/LSE) pods. Reclaimed resources can be used by BE pods. You can use the dynamic resource +overcommitment feature to improve the resource utilization of a cluster by deploying both LS pods and BE pods in the +cluster. To ensure sufficient CPU resources for the LS pods on a node, you can use koordinator to limit the CPU +usage of the BE pods on the node. The elastic resource limit feature can maintain the resource utilization of a node +below a specified threshold and limit the amount of CPU resources that can be used by BE pods. This ensures the +stability of the containers on the node. + +CPU Threshold indicates the CPU utilization threshold of a node. Pod (LS).Usage indicates the CPU usage of LS pods. +CPU Restriction for BE indicates the CPU usage of BE pods. The amount of CPU resources that can be used by BE pods +is adjusted based on the increase or decrease of the CPU usage of LS pods. We recommend that you use the same value +for CPU Threshold and the reserved CPU watermark in the dynamic resource overcommitment model. +This ensures a consistent level of CPU resource utilization. + +![CPU-Suppress](/img/cpu-suppress-demo.svg) + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 +- Koordinator >= 0.6 + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to +[Installation](/docs/installation). + +### Configurations +When installing through the helm chart, the ConfigMap slo-controller-config will be created in the koordinator-system +namespace, and the CPU Suppress mechanism is enabled by default. If it needs to be closed, refer to the configuration +below, and modify the configuration of the resource-threshold-config section to take effect. + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: {{ .Values.installation.namespace }} +data: + ... + resource-threshold-config: | + { + "clusterStrategy": { + "enable": true, + "cpuSuppressThresholdPercent": 65 + } + } +``` + +#### (Optional) Advanced Settings +Also, the `CPU Suppress` feature allows you to configure the CPU utilization threshold in a fine-grained manner. +The following table describes the parameters. + +| Parameter | Data type | Valid value | Description | +| --------- | --------- | ----------- | ----------- | +| enable | Boolean | true; false | true: enables the elastic resource limit feature; false: disables the elastic resource limit feature. | +| cpuSuppressThresholdPercent | Int | 0~100 | The CPU utilization threshold of the node. Unit: %. Default value: 65. | + +## Use CPU Suppress + +1. Create a configmap.yaml file based on the following ConfigMap content: +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + # Enable the elastic resource limit feature. + resource-threshold-config: | + { + "clusterStrategy": { + "enable": true + } + } +``` + +2. Run the following command to update the ConfigMap. +To avoid changing other settings in the ConfigMap, we commend that you run the kubectl patch command to update the ConfigMap. + +```bash +kubectl patch cm -n koordinator-system slo-controller-config --patch "$(cat configmap.yaml)" +``` + +3. Run the following command to query the CPU cores that are allocated to the BE pods on the node: +```bash +cat /sys/fs/cgroup/cpuset/kubepods.slice/kubepods-besteffort.slice/cpuset.cpus +``` +Expected output: +```bash +10-25,35-51,62-77,87-103 +``` +The output shows that the following CPU cores are allocated to the BE pods on the node: 10-25, 35-51, 62-77, and 87-103, +which will be changed dynamically according to the load of latency-sensitve pods. \ No newline at end of file diff --git a/versioned_docs/version-v1.4/user-manuals/fine-grained-cpu-orchestration.md b/versioned_docs/version-v1.4/user-manuals/fine-grained-cpu-orchestration.md new file mode 100644 index 000000000..12dd76aeb --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/fine-grained-cpu-orchestration.md @@ -0,0 +1,262 @@ +# CPU Orchestration + +Fine-grained CPU Orchestration is an ability of koord-scheduler for improving the performance of CPU-sensitive workloads. + +## Introduction + +There is an increasing number of systems that leverage a combination of CPUs and hardware accelerators to support +latency-critical execution and high-throughput parallel computation. A high-performance environment is expected in +plenty of applications including in telecommunications, scientific computing, machine learning, financial services, and +data analytics. + +However, pods in the Kubernetes cluster may interfere with others' running when they share the same physical resources +and both demand many resources. The sharing of CPU resources is almost inevitable. e.g. SMT threads (i.e. logical +processors) share execution units of the same core, and cores in the same chip share one last-level cache. The resource +contention can slow down the running of these CPU-sensitive workloads, resulting in high response latency (RT). + +To improve the performance of CPU-sensitive workloads, koord-scheduler provides a mechanism of fine-grained CPU +orchestration. It enhances the CPU management of Kubernetes and supports detailed NUMA-locality and CPU exclusions. + +For more information, please see [Design: Fine-grained CPU orchestration](/docs/designs/fine-grained-cpu-orchestration). + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 +- Koordinator >= 0.6 + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to [Installation](/docs/installation). + +### Global Configuration via plugin args + +Fine-grained CPU orchestration is *Enabled* by default. You can use it without any modification on the koord-scheduler config. + +For users who need deep insight, please configure the rules of fine-grained CPU orchestration by modifying the ConfigMap +`koord-scheduler-config` in the helm chart. + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: koord-scheduler-config + ... +data: + koord-scheduler-config: | + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: KubeSchedulerConfiguration + profiles: + - schedulerName: koord-scheduler + - pluginConfig: + - name: NodeNUMAResource + args: + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: NodeNUMAResourceArgs + # The default CPU Binding Policy. The default is FullPCPUs + # If the Pod belongs to LSE/LSR Prod Pods, and if no specific CPU binding policy is set, + # the CPU will be allocated according to the default core binding policy. + defaultCPUBindPolicy: FullPCPUs + # the scoring strategy + scoringStrategy: + # the scoring strategy ('MostAllocated', 'LeastAllocated') + # - MostAllocated(default): prefer the node with the least available resources + # - LeastAllocated: prefer the node with the most available resources + type: MostAllocated + # the weights of each resource type + resources: + - name: cpu + weight: 1 + plugins: + # enable the NodeNUMAResource plugin + preFilter: + enabled: + - name: NodeNUMAResource + filter: + enabled: + - name: NodeNUMAResource + ... + score: + enabled: + - name: NodeNUMAResource + weight: 1 + ... + reserve: + enabled: + - name: NodeNUMAResource + preBind: + enabled: + - name: NodeNUMAResource +``` + +The koord-scheduler takes this ConfigMap as [scheduler Configuration](https://kubernetes.io/docs/reference/scheduling/config/). +New configurations will take effect after the koord-scheduler restarts. + +| Field | Description | Version | +|-------|-------------|---------| +| defaultCPUBindPolicy | The default CPU Binding Policy. The default is FullPCPUs. If the Pod belongs to LSE/LSR Prod Pods, and if no specific CPU binding policy is set, the CPU will be allocated according to the default CPU binding policy. The optional values are FullPCPUs and SpreadByPCPUs | >= v0.6.0 | +| scoringStrategy | the scoring strategy, including MostAllocated and LeastAllocated | >= v0.6.0 | + +### Configure by Node + +Users can set CPU binding policy and NUMA Node selection policy separately for Node. + +#### CPU bind policy + +The label `node.koordinator.sh/cpu-bind-policy` constrains how to bind CPU logical CPUs when scheduling. +The following is the specific value definition: + +| Value | Description | Version | +|-------|-------------|---------| +| None or empty value | does not perform any policy| >= v0.6.0 | +| FullPCPUsOnly | requires that the scheduler must allocate full physical cores. Equivalent to kubelet CPU manager policy option full-pcpus-only=true. | >= v0.6.0 | +| SpreadByPCPUs | requires that the schedler must evenly allocate logical CPUs across physical cores. | >= v1.1.0 | + +If there is no `node.koordinator.sh/cpu-bind-policy` in the node's label, it will be executed according to the policy configured by the Pod or koord-scheduler. + +#### NUMA allocate strategy + +The label `node.koordinator.sh/numa-allocate-strategy` indicates how to choose satisfied NUMA Nodes when scheduling. +The following is the specific value definition: + +| Value | Description | Version | +|-------|-------------|---------| +| MostAllocated | MostAllocated indicates that allocates from the NUMA Node with the least amount of available resource.| >= v.0.6.0 | +| LeastAllocated | LeastAllocated indicates that allocates from the NUMA Node with the most amount of available resource.| >= v.0.6.0 | + +If both `node.koordinator.sh/numa-allocate-strategy` and `kubelet.koordinator.sh/cpu-manager-policy` are defined, `node.koordinator.sh/numa-allocate-strategy` is used first. + +## Use Fine-grained CPU Orchestration + +1. Create an `nginx` deployment with the YAML file below. + +> Fine-grained CPU Orchestration allows pods to bind CPUs exclusively. To use fine-grained CPU orchestration, pods should set a label of [QoS Class](/docs/architecture/qos#definition)) and specify the cpu binding policy. + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-lsr + labels: + app: nginx-lsr +spec: + replicas: 3 + selector: + matchLabels: + app: nginx-lsr + template: + metadata: + name: nginx-lsr + labels: + app: nginx-lsr + koordinator.sh/qosClass: LSR # set the QoS class as LSR, the binding policy is FullPCPUs by default + # in v0.5, binding policy should be specified. + # e.g. to set binding policy as FullPCPUs (prefer allocating full physical CPUs of the same core): + #annotations: + #scheduling.koordinator.sh/resource-spec: '{"preferredCPUBindPolicy": "FullPCPUs"}' + spec: + schedulerName: koord-scheduler # use the koord-scheduler + containers: + - name: nginx + image: nginx + resources: + limits: + cpu: '2' + requests: + cpu: '2' + priorityClassName: koord-prod +``` + +2. Deploy the `nginx` deployment and check the scheduling result. + +```bash +$ kubectl create -f nginx-deployment.yaml +deployment/nginx-lsr created +$ kubectl get pods -o wide | grep nginx +nginx-lsr-59cf487d4b-jwwjv 1/1 Running 0 21s 172.20.101.35 node-0 +nginx-lsr-59cf487d4b-4l7r4 1/1 Running 0 21s 172.20.101.79 node-1 +nginx-lsr-59cf487d4b-nrb7f 1/1 Running 0 21s 172.20.106.119 node-2 +``` + +3. Check the CPU binding results of pods on `scheduling.koordinator.sh/resource-status` annotations. + +```bash +$ kubectl get pod nginx-lsr-59cf487d4b-jwwjv -o jsonpath='{.metadata.annotations.scheduling\.koordinator\.sh/resource-status}' +{"cpuset":"2,54"} +``` + +We can see that the pod `nginx-lsr-59cf487d4b-jwwjv` binds 2 CPUs, and the IDs are 2,54, which are the logical +processors of the **same** core. + +4. Change the binding policy in the `nginx` deployment with the YAML file below. + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-lsr + labels: + app: nginx-lsr +spec: + replicas: 3 + selector: + matchLabels: + app: nginx-lsr + template: + metadata: + name: nginx-lsr + labels: + app: nginx-lsr + koordinator.sh/qosClass: LSR # set the QoS class as LSR + annotations: + # set binding policy as SpreadByPCPUs (prefer allocating physical CPUs of different cores) + scheduling.koordinator.sh/resource-spec: '{"preferredCPUBindPolicy": "SpreadByPCPUs"}' + spec: + schedulerName: koord-scheduler # use the koord-scheduler + containers: + - name: nginx + image: nginx + resources: + limits: + cpu: '2' + requests: + cpu: '2' + priorityClassName: koord-prod +``` + +5. Update the `nginx` deployment and check the scheduling result. + +```bash +$ kubectl apply -f nginx-deployment.yaml +deployment/nginx-lsr created +$ kubectl get pods -o wide | grep nginx +nginx-lsr-7fcbcf89b4-rkrgg 1/1 Running 0 49s 172.20.101.35 node-0 +nginx-lsr-7fcbcf89b4-ndbks 1/1 Running 0 49s 172.20.101.79 node-1 +nginx-lsr-7fcbcf89b4-9v8b8 1/1 Running 0 49s 172.20.106.119 node-2 +``` + +6. Check the new CPU binding results of pods on `scheduling.koordinator.sh/resource-status` annotations. + +```bash +$ kubectl get pod nginx-lsr-7fcbcf89b4-rkrgg -o jsonpath='{.metadata.annotations.scheduling\.koordinator\.sh/resource-status}' +{"cpuset":"2-3"} +``` + +Now we can see that the pod `nginx-lsr-59cf487d4b-jwwjv` binds 2 CPUs, and the IDs are 2,3, which are the logical +processors of the **different** core. + +7. (Optional) Advanced configurations. + +```yaml + labels: + # koordinator QoS class of the pod. (use 'LSR' or 'LSE' for binding CPUs) + koordinator.sh/qosClass: LSR + annotations: + # `resource-spec` indicates the specification of resource scheduling, here we need to set `preferredCPUBindPolicy`. + # `preferredCPUBindPolicy` indicating the CPU binding policy of the pod ('None', 'FullPCPUs', 'SpreadByPCPUs') + # - None: perform no exclusive policy + # - FullPCPUs(default): a bin-packing binding policy, prefer allocating full physical cores (SMT siblings) + # - SpreadByPCPUs: a spread binding policy, prefer allocating logical cores (SMT threads) evenly across physical cores (SMT siblings) + scheduling.koordinator.sh/resource-spec: '{"preferredCPUBindPolicy": "FullPCPUs"}' +``` diff --git a/versioned_docs/version-v1.4/user-manuals/fine-grained-device-scheduling.md b/versioned_docs/version-v1.4/user-manuals/fine-grained-device-scheduling.md new file mode 100644 index 000000000..89c2e1975 --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/fine-grained-device-scheduling.md @@ -0,0 +1,327 @@ +# Device Scheduling - GPU/RDMA +We provide a fine-grained mechanism for managing GPUs and other devices such as RDMA and FPGA, defines a set of APIs to +describe device information on nodes, including GPU, RDMA, and FPGA, and a new set of resource names to flexibly support +users to apply at a finer granularity GPU resources. This mechanism is the basis for subsequent other GPU scheduling +capabilities such as GPU Share, GPU Overcommitment, etc. + +## Introduction +GPU devices have very strong computing power, but are expensive. How to make better use of GPU equipment, give full play +to the value of GPU and reduce costs is a problem that needs to be solved. In the existing GPU allocation mechanism of +the K8s community, the GPU is allocated by the kubelet, and it is a complete device allocation. This method is simple +and reliable, but similar to the CPU and memory, the GPU will also be wasted. Therefore, some users expect to use only +a portion of the GPU's resources and share the rest with other workloads to save costs. Moreover, GPU has particularities. +For example, the NVLink and oversold scenarios supported by NVIDIA GPU mentioned below both require a central decision +through the scheduler to obtain globally optimal allocation results. + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 +- Koordinator >= 0.71 + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to [Installation](/docs/installation). + +#### Runtime Requirements +The scheduled GPU devices are bound to the container requires support from the runtime environment. Currently, there are two solutions to achieve this: + +Runtime Environment | Installation +------------- | ------------- +Containerd >= 1.7.0
Koordinator >= 1.3 | Please make sure NRI is enabled in containerd. If not, please refer to [Enable NRI in Containerd](https://github.com/containerd/containerd/blob/main/docs/NRI.md) +others | Please make sure koord-runtime-proxy component is correctly installed in you cluser. If not, please refer to [Installation Runtime Proxy](installation-runtime-proxy). + + +### Configurations + +DeviceScheduling is *Enabled* by default. You can use it without any modification on the koord-scheduler config. + +## Use DeviceScheduling + +### Quick Start + +1.check device crd: + +```bash +$ kubectl get device host04 -o yaml +``` + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: Device +metadata: + creationTimestamp: "2022-10-08T09:26:42Z" + generation: 1 + managedFields: + - apiVersion: scheduling.koordinator.sh/v1alpha1 + fieldsType: FieldsV1 + fieldsV1: + f:metadata: + f:ownerReferences: {} + f:spec: + .: {} + f:devices: {} + f:status: {} + manager: koordlet + operation: Update + time: "2022-10-08T09:26:42Z" + name: host04 + ownerReferences: + - apiVersion: v1 + blockOwnerDeletion: true + controller: true + kind: Node + name: host04 + uid: 09c4f912-6026-467a-85d2-6b2147c9557e + resourceVersion: "39011943" + selfLink: /apis/scheduling.koordinator.sh/v1alpha1/devices/host04 + uid: 5a498e1f-1357-4518-b74c-cab251d6c18c +spec: + devices: + - health: true + id: GPU-04cea5cd-966f-7116-1d58-1ac34421541b + minor: 0 + resources: + kubernetes.io/gpu-core: "100" + kubernetes.io/gpu-memory: 16Gi + kubernetes.io/gpu-memory-ratio: "100" + type: gpu + - health: true + id: GPU-3680858f-1753-371e-3c1a-7d8127fc7113 + minor: 1 + resources: + kubernetes.io/gpu-core: "100" + kubernetes.io/gpu-memory: 16Gi + kubernetes.io/gpu-memory-ratio: "100" + type: gpu +status: {} +``` +We can find this node has two gpu cards, we can find the detail info of each gpu card here. + +2.check node allocatable resource: + +```bash +$ kubectl get node host04 -o yaml +``` + +```yaml +apiVersion: v1 +kind: Node +metadata: + annotations: + flannel.alpha.coreos.com/backend-data: '{"VtepMAC":"5a:69:48:10:29:25"}' + creationTimestamp: "2022-08-29T09:12:55Z" + labels: + beta.kubernetes.io/os: linux + status: + addresses: + - address: 10.15.0.37 + type: InternalIP + - address: host04 + type: Hostname + allocatable: + cpu: "6" + ephemeral-storage: "200681483926" + kubernetes.io/gpu: "200" + kubernetes.io/gpu-core: "200" + kubernetes.io/gpu-memory: 32Gi + kubernetes.io/gpu-memory-ratio: "200" + memory: 59274552Ki + nvidia.com/gpu: "2" + pods: "220" + capacity: + cpu: "8" + kubernetes.io/gpu: "200" + kubernetes.io/gpu-core: "200" + kubernetes.io/gpu-memory: 32Gi + kubernetes.io/gpu-memory-ratio: "200" + memory: 61678904Ki + nvidia.com/gpu: "2" + pods: "220" +``` +We can find the node allocatable resource has merged each gpu card resource. + +3.apply pod: +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example + namespace: default +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + kubernetes.io/gpu: "100" + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +```bash +$ kubectl get pod -n default pod-example -o yaml +``` + +```yaml +apiVersion: v1 +kind: Pod +metadata: + annotations: + scheduling.koordinator.sh/device-allocated: '{"gpu":[{"minor":0,"resources":{"kubernetes.io/gpu-core":"100","kubernetes.io/gpu-memory":"12508288Ki","kubernetes.io/gpu-memory-ratio":"100"}}]}' + creationTimestamp: "2022-10-08T09:33:07Z" + name: pod-example + namespace: default + resourceVersion: "39015044" + selfLink: /api/v1/namespaces/xlf/pods/gpu-pod7 + uid: 6bf1ac3c-0c9f-472a-8b86-de350bbfa795 +spec: + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: "1" + kubernetes.io/gpu: "100" + memory: 256Mi + requests: + cpu: "1" + kubernetes.io/gpu: "100" + memory: 256Mi +status: + conditions: + ... + hostIP: 10.0.0.149 + phase: Running + podIP: 10.244.2.45 + podIPs: + - ip: 10.244.2.45 + qosClass: Guaranteed + startTime: "2022-10-08T09:33:07Z" +``` +You can find the concrete device allocate result through annotation `scheduling.koordinator.sh/device-allocated`. + +4.more apply protocol: +```yaml +apiVersion: v1 +kind: Pod +... +spec: + ... + resources: + requests: + cpu: 40m + memory: 40Mi + nvidia.com/gpu: "100" +``` + +```yaml +apiVersion: v1 +kind: Pod +... +spec: + ... + resources: + requests: + cpu: 40m + memory: 40Mi + kubernetes.io/gpu-core: "100" + kubernetes.io/gpu-memory-ratio: "100" +``` + +```yaml +apiVersion: v1 +kind: Pod +... +spec: + ... + resources: + requests: + cpu: 40m + memory: 40Mi + kubernetes.io/gpu-core: "100" + kubernetes.io/gpu-memory: "16Mi" +``` + +4.device resource debug api: +```bash +$ kubectl -n koordinator-system get lease koord-scheduler --no-headers | awk '{print $2}' | cut -d'_' -f1 | xargs -I {} kubectl -n koordinator-system get pod {} -o wide --no-headers | awk '{print $6}' + 10.244.0.64 + +$ curl 10.244.0.64:10251/apis/v1/plugins/DeviceShare/nodeDeviceSummaries +$ curl 10.244.0.64:10251/apis/v1/plugins/DeviceShare/nodeDeviceSummaries/host04 +``` + +```json +{ + "allocateSet": { + "gpu": { + "xlf/gpu-pod7": { + "0": { + "kubernetes.io/gpu-core": "100", + "kubernetes.io/gpu-memory": "12508288Ki", + "kubernetes.io/gpu-memory-ratio": "100" + } + } + } + }, + "deviceFree": { + "kubernetes.io/gpu-core": "0", + "kubernetes.io/gpu-memory": "0", + "kubernetes.io/gpu-memory-ratio": "0" + }, + "deviceFreeDetail": { + "gpu": { + "0": { + "kubernetes.io/gpu-core": "0", + "kubernetes.io/gpu-memory": "0", + "kubernetes.io/gpu-memory-ratio": "0" + } + } + }, + "deviceTotal": { + "kubernetes.io/gpu-core": "100", + "kubernetes.io/gpu-memory": "12508288Ki", + "kubernetes.io/gpu-memory-ratio": "100" + }, + "deviceTotalDetail": { + "gpu": { + "0": { + "kubernetes.io/gpu-core": "100", + "kubernetes.io/gpu-memory": "12508288Ki", + "kubernetes.io/gpu-memory-ratio": "100" + } + } + }, + "deviceUsed": { + "kubernetes.io/gpu-core": "100", + "kubernetes.io/gpu-memory": "12508288Ki", + "kubernetes.io/gpu-memory-ratio": "100" + }, + "deviceUsedDetail": { + "gpu": { + "0": { + "kubernetes.io/gpu-core": "100", + "kubernetes.io/gpu-memory": "12508288Ki", + "kubernetes.io/gpu-memory-ratio": "100" + } + } + } +} +``` diff --git a/versioned_docs/version-v1.4/user-manuals/gang-scheduling.md b/versioned_docs/version-v1.4/user-manuals/gang-scheduling.md new file mode 100644 index 000000000..6a1ddc371 --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/gang-scheduling.md @@ -0,0 +1,364 @@ +# GangScheduling + +## Introduction +We provide Gang mechanism for the scheduler to control pods binding opportunity. User can declare a resource-collection-minimum number, +only when assigned-resources reach the given limitation can trigger the binding. We provide `Strict` and `NonStrict` to +control the resource-accumulation-process by a configuration. We also provide a two-level Gang description for better matching +the real scenario, which is different from community. + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 +- Koordinator >= 0.70 + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to [Installation](/docs/installation). + +### Configurations + +GangScheduling is *Enabled* by default. You can use it without any modification on the koord-scheduler config. + +## Use GangScheduling + +### Quick Start + +#### apply gang through gang crd +1.create pod-group +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: PodGroup +metadata: + name: gang-example + namespace: default +spec: + scheduleTimeoutSeconds: 100 + minMember: 2 +``` + +```bash +$ kubectl get pgs -n default + NAME AGE + gang-example 13s +``` + +2.create child pod1 +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example1 + namespace: default + labels: + pod-group.scheduling.sigs.k8s.io: gang-example +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +```bash +$ kubectl get pod -n default + NAME READY STATUS RESTARTS AGE + pod-example1 0/1 Pending 0 7s +``` + +3.create child pod2 +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example2 + namespace: default + labels: + pod-group.scheduling.sigs.k8s.io: gang-example +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +```bash +$ kubectl get pod -n default + NAME READY STATUS RESTARTS AGE + pod-example1 1/1 Running 0 53s + pod-example2 1/1 Running 0 5s +``` + +```bash +$ kubectl get pg gang-example -n default -o yaml +``` + +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: PodGroup +metadata: + creationTimestamp: "2022-10-09T09:08:17Z" + generation: 6 +spec: + minMember: 1 + scheduleTimeoutSeconds: 100 +status: + phase: Running + running: 2 + scheduled: 2 +``` + +#### apply gang through annotation +1.create child pod1 +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example1 + namespace: default + annotations: + gang.scheduling.koordinator.sh/name: "gang-example" + gang.scheduling.koordinator.sh/min-available: "2" +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +```bash +$ kubectl get pod -n default + NAME READY STATUS RESTARTS AGE + pod-example1 0/1 Pending 0 7s +``` + +2.create child pod2 +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example2 + namespace: default + annotations: + gang.scheduling.koordinator.sh/name: "gang-example" + gang.scheduling.koordinator.sh/min-available: "2" +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` + +```bash +$ kubectl get pod -n default + NAME READY STATUS RESTARTS AGE + pod-example1 1/1 Running 0 53s + pod-example2 1/1 Running 0 5s +``` + +```bash +$ kubectl get pg gang-example -n default -o yaml +``` + +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: PodGroup +metadata: + creationTimestamp: "2022-10-09T09:08:17Z" + generation: 6 +spec: + minMember: 1 + scheduleTimeoutSeconds: 100 +status: + phase: Running + running: 2 + scheduled: 2 +``` + +#### device resource debug api: +```bash +$ kubectl -n koordinator-system get lease koord-scheduler --no-headers | awk '{print $2}' | cut -d'_' -f1 | xargs -I {} kubectl -n koordinator-system get pod {} -o wide --no-headers | awk '{print $6}' + 10.244.0.64 + +$ curl 10.244.0.64:10251/apis/v1/plugins/Coscheduling/gang/default/gang-example +``` + +```json +{ + "boundChildren": { + "default/pod-example1": {}, + "default/pod-example2": {} + }, + "children": { + "default/pod-example1": {}, + "default/pod-example2": {} + }, + "childrenScheduleRoundMap": { + "default/pod-example1": 2, + "default/pod-example2": 2 + }, + "createTime": "2022-10-09T07:31:53Z", + "gangFrom": "GangFromPodAnnotation", + "gangGroup": null, + "hasGangInit": true, + "minRequiredNumber": 2, + "mode": "Strict", + "name": "default/gang-example", + "onceResourceSatisfied": true, + "scheduleCycle": 2, + "scheduleCycleValid": true, + "totalChildrenNum": 2, + "waitTime": 600000000000, + "waitingForBindChildren": {} +} +``` + +#### advanced configuration for gang +1.apply through pod-group. + +```yaml +apiVersion: scheduling.sigs.k8s.io/v1alpha1 +kind: PodGroup +metadata: + name: gang-example1 + namespace: default + annotations: + gang.scheduling.koordinator.sh/total-number: "3" + gang.scheduling.koordinator.sh/mode: "NonStrict" + gang.scheduling.koordinator.sh/groups: "[\"default/gang-example1\", \"default/gang-example2\"]" + +spec: + scheduleTimeoutSeconds: 100 + minMember: 2 + +``` + +- `gang.scheduling.koordinator.sh/total-number` specifies the total children number of the gang. If not specified,it will be set with the `minMember` +- `gang.scheduling.koordinator.sh/mode` defines the Gang Scheduling operation when failed scheduling. Support `Strict\NonStrict`, default is `Strict` +- `gang.scheduling.koordinator.sh/groups` defines which gangs are bundled as a group. The gang will go to bind only all gangs in one group meet the conditions + +2.apply through pod annotations. +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-example2 + namespace: default + annotations: + gang.scheduling.koordinator.sh/name: "gang-example1" + gang.scheduling.koordinator.sh/min-available: "2" + gang.scheduling.koordinator.sh/total-number: "3" + gang.scheduling.koordinator.sh/mode: "Strict\NonStrict" + gang.scheduling.koordinator.sh/groups: "[\"default/gang-example1\", \"default/gang-example2\"]" + gang.scheduling.koordinator.sh/waiting-time: "100s" +spec: + schedulerName: koord-scheduler + containers: + - command: + - sleep + - 365d + image: busybox + imagePullPolicy: IfNotPresent + name: curlimage + resources: + limits: + cpu: 40m + memory: 40Mi + requests: + cpu: 40m + memory: 40Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always +``` +- `gang.scheduling.koordinator.sh/total-number` specifies the total children number of the gang. If not specified,it will be set with the `gang.scheduling.koordinator.sh/min-available` +- `gang.scheduling.koordinator.sh/mode` defines the Gang Scheduling operation when failed scheduling. Support `Strict\NonStrict`, default is `Strict` +- `gang.scheduling.koordinator.sh/groups` defines which gangs are bundled as a group. The gang will go to bind only all gangs in one group meet the conditions +- `gang.scheduling.koordinator.sh/waiting-time` specifies gang's max wait time in Permit Stage. + +#### advanced configuration for scheduler +you can modify `koord-scheduler-config.yaml` in helm to adjust `Coscheduling` configuration as below: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: koord-scheduler-config + namespace: {{ .Values.installation.namespace }} +data: + koord-scheduler-config: | + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: KubeSchedulerConfiguration + leaderElection: + leaderElect: true + resourceLock: leases + resourceName: koord-scheduler + resourceNamespace: {{ .Values.installation.namespace }} + profiles: + - pluginConfig: + - name: Coscheduling + args: + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: CoschedulingArgs` + defaultTimeout: 600s + controllerWorkers: 1 + - name: ElasticQuota + ... +``` + diff --git a/versioned_docs/version-v1.4/user-manuals/host-application-qos.md b/versioned_docs/version-v1.4/user-manuals/host-application-qos.md new file mode 100644 index 000000000..03cb2eb64 --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/host-application-qos.md @@ -0,0 +1,131 @@ +# QoS Management for Out-of-Band Applications on Host + +## Introduction +In a production environment, there could be more than just containerized applications managed by Kubernetes, but also +out-of-band applications running on hosts. Koordinator has supported +[node resources reservation](https://github.com/koordinator-sh/koordinator/blob/main/docs/proposals/scheduling/20221227-node-resource-reservation.md) +so that koord-scheduler can take them into account during allocation. However, these applications also have various QoS +level during runtime such as LS or BE. So, as for the QoS management, koordlet should also support setting QoS parameters +for these processes running on hosts. For example the out-of-band processes could be latency-sensitive types, and koordlet +should set them as high-priority in case they got interference for BE pods. + +Since most QoS strategies relies on cgroup mechanism, koordlet requires these applications must running under its own +cgroup if they need the QoS management. + +![image](/img/host-application.svg) + +Here are the supported QoS levels and strategies for out-of-band applications. +- LS (Latency Sensitive) + - CPU QoS(Group Identity): applications must run with cpu cgroup subsystem, and `koordlet` will set cpu.bvt_warp_ns according to the `resource-qos-config`. + - CPUSet Allocation: applications must run with cpuset cgroup subsystem, and `koorldet` will set **all cpus** in share-pools for them. + +- BE (Best-effort) + - CPU QoS(Group Identity): applications must run with cpu cgroup subsystem, and `koorldet` will set cpu.bvt_warp_ns according to the `resource-qos-config`. + +## Prerequisite +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to +[Installation](/docs/installation). + +Host applications should already run with cgroup, see the [kernel manual](https://docs.kernel.org/admin-guide/cgroup-v1/cgroups.html) +for more details. + +| Component | Version Requirement | +| --- | ------- | +| Kubernetes | ≥v1.18 | +| koordinator | ≥v1.4.0 | + +## Use QoS management for out-of-band applications on host + +1. Run host application under cgroup `host-latency-sensitive/nginx/` with cpu and cpuset subsystem. It should be noted +that `cpuset.cpus` and `cpuset.mems` of each level must be initialized with manually, which could be equal to the cgroup root path. +```shell script +# init cgroup dir on cgroup v1 +$ mkdir -p /sys/fs/cgroup/cpuset/host-latency-sensitive/nginx/ +$ mkdir -p /sys/fs/cgroup/cpu/host-latency-sensitive/nginx/ +$ cat /sys/fs/cgroup/cpuset/cpuset.cpus > /sys/fs/cgroup/cpuset/host-latency-sensitive/cpuset.cpus +$ cat /sys/fs/cgroup/cpuset/cpuset.cpus > /sys/fs/cgroup/cpuset/host-latency-sensitive/nginx/cpuset.cpus +$ cat /sys/fs/cgroup/cpuset/cpuset.mems > /sys/fs/cgroup/cpuset/host-latency-sensitive/cpuset.mems +$ cat /sys/fs/cgroup/cpuset/cpuset.mems > /sys/fs/cgroup/cpuset/host-latency-sensitive/nginx/cpuset.mems + +# bind application to corresponding cgroups +$ echo ${your-application-pids} > /sys/fs/cgroup/cpuset/host-latency-sensitive/nginx/tasks +$ echo ${your-application-pids} > /sys/fs/cgroup/cpu/host-latency-sensitive/nginx/tasks +``` + +2. Create a configmap file base on the following ConfigMap content: +```yaml +apiVersion: v1 +data: + host-application-config: | + { + "applications": [ + { + "name": "nginx", + "qos": "LS", + "cgroupPath": { + "base": "CgroupRoot", + "parentDir": "host-latency-sensitive/", + "relativePath": "nginx/" + } + } + ] + } + resource-qos-config: | + { + "clusterStrategy": { + "lsClass": { + "cpuQOS": { + "enable": true, + "groupIdentity": 2 + } + }, + "beClass": { + "cpuQOS": { + "enable": true, + "groupIdentity": -1 + } + } + } + } +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +``` + + | Configuration item | Parameter | Valid values | Description | + | :-------------- | :------ | :-------- | :----------------------------------------------------------- | + | `applications` | []HostApplicationSpec | N/A | spec description of host applications. | + | `name` | String | N/A | name of the host application. | + | `qos` | String | LS/BE | QoS class of the application. | + | `cgroupPath` | CgroupPath | N/A | cgroup path of the application, the directory equals to `${base}/${parentDir}/${relativePath}`。 | + | `cgroupPath.base` | String | CgroupRoot/Kubepods/KubepodsBurstable/KubepodsBesteffort | cgroup base dir of the application, the format is various across cgroup drivers. | + | `cgroupPath.parentDir` | String | N/A | cgroup parent path under base dir. By default it is "host-latency-sensitive/" for LS and "host-latency-sensitive/" for BE. | + | `cgroupPath.relativePath` | String | N/A | cgroup relative path under parent dir. | + +3. Check whether a ConfigMap named `slo-controller-config` exists in the `koordinator-system` namespace. + + - If a ConfigMap named `slo-controller-config` exists, we commend that you run the kubectl patch command to update the ConfigMap. This avoids changing other settings in the ConfigMap. + + ```bash + kubectl patch cm -n koordinator-system slo-controller-config --patch "$(cat configmap.yaml)" + ``` + + - If no ConfigMap named `slo-controller-config` exists, run the kubectl patch command to create a ConfigMap named ack-slo-config: + + ```bash + kubectl apply -f configmap.yaml + ``` + +4. Check the cgroup value of host application, then you can find the content of `cpu.bvt_warp_ns` equals to the LS class, +and the cpuset.cpus equals to the LS CPU share pool. +```shell script +$ cat /sys/fs/cgroup/cpu/host-latency-sensitive/nginx/cpu.bvt_warps_ns +$ 2 + +$ cat /sys/fs/cgroup/cpuset/host-latency-sensitive/nginx/cpuset.cpus +$ 1-5,8-23,32-47,50-51,53,56-71,80-103 + +$ kubectl get noderesourcetopology ${your-node-id} -o yaml | grep node.koordinator.sh/cpu-shared-pools + node.koordinator.sh/cpu-shared-pools: '[{"socket":0,"node":0,"cpuset":"1-5,8-23,53,56-71"},{"socket":1,"node":1,"cpuset":"32-47,50-51,80-103"}]' +``` \ No newline at end of file diff --git a/versioned_docs/version-v1.4/user-manuals/installation-runtime-proxy.md b/versioned_docs/version-v1.4/user-manuals/installation-runtime-proxy.md new file mode 100644 index 000000000..3bec465d9 --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/installation-runtime-proxy.md @@ -0,0 +1,78 @@ +# Installation Runtime Proxy + +koord-runtime-proxy acts as a proxy between kubelet and containerd(dockerd under dockershim scenario), which is designed to intercept CRI request, and apply some resource management policies, such as setting different cgroup parameters by pod priorities under hybrid workload orchestration scenario, applying new isolation policies for latest Linux kernel, CPU architecture, and etc. +For pods that do not want hook servers processing (such as addon pods), you can skip them by adding `runtimeproxy.koordinator.sh/skip-hookserver=true` to the pod label. + +Currently known features that require RuntimeProxy include: + +- [GPU Share env](fine-grained-device-scheduling) +- set cpuset before container starting +- set rdt before container starting + +These features will be based on NRI alternatives in the future, ** if you don't know what you are doing, please do not install this component **. + +## 1、Get binary + +Download from github releases: +```bash +$ # select the version +$ wget https://github.com/koordinator-sh/koordinator/releases/download/v1.3.0/koord-runtime-proxy_1.3.0_linux_x86_64 -O koord-runtime-proxy +$ chmod +x koord-runtime-proxy +``` + +Or you can build from source: +```bash +$ git clone https://github.com/koordinator-sh/koordinator.git +$ cd koordinator +$ make build-koord-runtime-proxy +``` + +## 2、Setup koord-runtime-proxy + +Firstly, please make sure your runtime backend is containerd or dockerd. + +Under containerd scenario, if your containerd listening CRI request on default `/var/run/containerd/containerd.sock`, koord-runtime-proxy can be setup by(no need to set any parameters): + +``` +koord-runtime-proxy +``` + +Or koord-runtime-proxy can be setup with command: + +``` +koord-runtime-proxy \ + --remote-runtime-service-endpoint= \ + --remote-image-service-endpoint= +``` + +Under docker scenario, koord-runtime-proxy should be setup with the additional parameter `--backend-runtime-mode Docker`, and without `remote-image-service-endpoint`: + +``` +koord-runtime-proxy \ + --backend-runtime-mode=Docker \ + --remote-runtime-service-endpoint= +``` + +koord-runtime-proxy will listen on `/var/run/koord-runtimeproxy/runtimeproxy.sock`. + +## 3、Setup Kubelet + +To make koord-runtime-proxy a proxy between kubelet and containerd, kubelet parameters should be altered as shown below: + +``` +# If the kubelet version is less than 1.24: +kubelet \ + --container-runtime=remote \ + --container-runtime-endpoint=unix:///var/run/koord-runtimeproxy/runtimeproxy.sock + +# If the kubelet version is greater than or equal to 1.24: +kubelet \ + --container-runtime-endpoint=unix:///var/run/koord-runtimeproxy/runtimeproxy.sock +``` + +Under docker scenario, to make koord-runtime-proxy a proxy between kubelet and dockerd, kubelet parameters should be altered as shown below: + +``` +kubelet --docker-endpoint=unix:///var/run/koord-runtimeproxy/runtimeproxy.sock +``` + diff --git a/versioned_docs/version-v1.4/user-manuals/load-aware-descheduling.md b/versioned_docs/version-v1.4/user-manuals/load-aware-descheduling.md new file mode 100644 index 000000000..0f7de1951 --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/load-aware-descheduling.md @@ -0,0 +1,229 @@ +# Load Aware Descheduling + +The load-aware scheduling supported in the scheduler can select nodes with lower loads to run new Pods during scheduling, but as time, cluster environment changes, and changes in traffic/requests faced by workloads, the utilization of nodes will change dynamically Changes in the cluster will break the original load balance between nodes in the cluster, and even extreme load imbalance may occur, affecting the runtime quality of the workload. + +koord-descheduler perceives changes in the load of nodes in the cluster, automatically optimizes nodes that exceed the safety load to prevents extreme load imbalance. + +## Introduction + +The LowNodeLoad plugin in the koord-descheduler is responsible for sensing the load of the node, and reducing the load hotspot by evict/migrate Pod. The `LowNodeLoad` plugin is different from the Kubernetes native descheduler plugin LowNodeUtilization in that `LowNodeLoad` decides to deschedule based on the actual utilization of nodes, while LowNodeUtilization decides to deschedule based on the resource allocation. + +The `LowNodeLoad` plugin has two most important parameters: + +- `highThresholds` defines the target usage threshold of resources. The Pods on nodes exceeding this threshold will be evicted/migrated. +- `lowThresholds` defines the low usage threshold of resources. The Pods on nodes below this threshold will not be evicted/migrated. + +Take the following figure as an example, `lowThresholds` is 45%, `highThresholds` is 70%, we can classify nodes into three categories: + +1. Idle Node. Nodes with resource utilization below lowThresholds(45%); +2. Normal Node. For nodes whose resource utilization is higher than lowThresholds but lower than highThresholds(70%), this load water level range is a reasonable range we expect +3. Hotspot Node. If the node resource utilization rate is higher than highThresholds, the node will be judged as unsafe and belongs to the hotspot node, and some pods should be expelled to reduce the load level so that it does not exceed 70%. + +![image](/img/low-node-load.png) + +After identifying which nodes are hotspots, descheduler will perform a eviction/migration operation to evict some Pods from hotspot nodes to idle nodes. If the number of ```Idle Node`````` is 0 or the number of ```Hotspot Node`````` is 0, the descheduler does nothing. + +If the total number of idle nodes in a cluster is not many, descheduling will be terminated. This can be helpful in large clusters where some nodes may be underutilized frequently or for short periods of time. By default, `numberOfNodes` is set to zero. This capability can be enabled by setting the parameter `numberOfNodes`. +Before migration, descheduler will calculate the actual free capacity to ensure that the sum of the actual utilization of the Pods to be migrated does not exceed the total free capacity in the cluster. These actual free capacities come from idle nodes, and the actual free capacity of an idle node = `(highThresholds - current load of the node) * total capacity of the node`. Suppose the load level of node A is 20%, the highThresholds is 70%, and the total CPU of node A is 96C, then `(70%-20%) * 96 = 48C`, and this 48C is the free capacity that can be carried. + +In addition, when migrating hotspot nodes, the Pods on the nodes will be filtered. Currently, descheduler supports multiple filtering parameters, which can avoid migration and expulsion of very important Pods: + +- Filter by namespace. Can be configured to filter only certain namespaces or filter out certain namespaces +- Filter by pod selector. Pods can be filtered out through the label selector, or Pods with certain Labels can be excluded +- Configure `nodeFit` to check whether the scheduling rules have candidate nodes. When enabled, descheduler checks whether there is a matching Node in the cluster according to the Node Affinity/Node Selector/Toleration corresponding to the candidate Pod. If not, the Pod will not be evicted for migration. If you set `nodeFit` to false, the migration controller in the descheduler will complete the capacity reservation at this time, and start the migration after ensuring that there are resources. + +After the Pods are filtered out, these Pods are sorted from multiple dimensions such as QoSClass, Priority, actual usage, and creation time. + +After pods have been filtered and sorted, the migration operation begins. Before migration, it will check whether the remaining free capacity is satisfied and whether the load the current node is higher than the target safety threshold. If one of these two conditions cannot be met, descheduling will stop. Every time a Pod is migrated, the remaining free capacity will be withheld, and the load of the current node will be adjusted at the same time until the remaining capacity is insufficient or the load reaches the safety threshold. + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 +- Koordinator >= 1.1.1 + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to [Installation](/docs/installation). + +### Global Configuration via plugin args + +Load-aware descheduling is *Disabled* by default. You can modify the ConfigMap `koord-descheduler-config` to enable the plugin. + +For users who need deep insight, please configure the rules of load-aware descheduling by modifying the ConfigMap +`koord-descheduler-config` in the helm chart. New configurations will take effect after the koord-descheduler restarts. + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: koord-descheduler-config + ... +data: + koord-descheduler-config: | + apiVersion: descheduler/v1alpha2 + kind: DeschedulerConfiguration + ... + # Execute the LowNodeLoad plugin every 60s + deschedulingInterval: 60s + profiles: + - name: koord-descheduler + plugins: + deschedule: + disabled: + - name: "*" + balance: + enabled: + - name: LowNodeLoad # Configure to enable the LowNodeLoad plugin + .... + pluginConfig: + - name: LowNodeLoad + args: + apiVersion: descheduler/v1alpha2 + kind: LowNodeLoadArgs + evictableNamespaces: + # include and exclude are mutually exclusive, only one of them can be configured. + # include indicates that only the namespace configured below will be processed + # include: + # - test-namespace + # exclude means to only process namespaces other than those configured below + exclude: + - "kube-system" + - "koordinator-system" + # lowThresholds defines the low usage threshold of resources + lowThresholds: + cpu: 20 + memory: 30 + # highThresholds defines the target usage threshold of resources + highThresholds: + cpu: 50 + memory: 60 + .... +``` + +| Field | Description | Version | +|-------|-------------| --------| +| paused | Paused indicates whether the LowNodeLoad should to work or not. | >= v1.1.1 | +| dryRun | DryRun means only execute the entire deschedule logic but don't migrate Pod | >= v1.1.1 | +| numberOfNodes | NumberOfNodes can be configured to activate the strategy only when the number of under utilized nodes are above the configured value. This could be helpful in large clusters where a few nodes could go under utilized frequently or for a short period of time. By default, NumberOfNodes is set to zero. | >= v1.1.1 | +| evictableNamespaces | Naming this one differently since namespaces are still considered while considering resources used by pods but then filtered out before eviction. | >= v1.1.1 | +| nodeSelector | NodeSelector selects the nodes that matched labelSelector. | >= v1.1.1 | +| podSelectors | PodSelectors selects the pods that matched labelSelector. | >= v1.1.1 | +| nodeFit | NodeFit if enabled, it will check whether the candidate Pods have suitable nodes, including NodeAffinity, TaintTolerance, and whether resources are sufficient. By default, NodeFit is set to true. | >= v1.1.1 | +| useDeviationThresholds | If UseDeviationThresholds is set to `true`, the thresholds are considered as percentage deviations from mean resource usage. `lowThresholds` will be deducted from the mean among all nodes and `highThresholds` will be added to the mean. A resource consumption above (resp. below) this window is considered as overutilization (resp. underutilization). | >= v1.1.1 | +| highThresholds | HighThresholds defines the target usage threshold of resources | >= v1.1.1 | +| lowThresholds | LowThresholds defines the low usage threshold of resources | >= v1.1.1 | + +## Use Load Aware Descheduling + +The example cluster in this article has three 4-core 16GiB nodes. + +1. Deploy two `stress` pod with the YAML file below. + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: stress-demo + namespace: default + labels: + app: stress-demo +spec: + replicas: 2 + selector: + matchLabels: + app: stress-demo + template: + metadata: + name: stress-demo + labels: + app: stress-demo + spec: + containers: + - args: + - '--vm' + - '2' + - '--vm-bytes' + - '1600M' + - '-c' + - '2' + - '--vm-hang' + - '2' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + limits: + cpu: '2' + memory: 4Gi + requests: + cpu: '2' + memory: 4Gi + restartPolicy: Always + schedulerName: koord-scheduler # use the koord-scheduler +``` + +```bash +$ kubectl create -f stress-demo.yaml +deployment.apps/stress-demo created +``` + +2. Watch the pod status util they become running. + +```bash +$ kubectl get pod -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +stress-demo-7fdd89cc6b-lml7k 1/1 Running 0 21m 10.0.2.83 cn-beijing.10.0.2.54 +stress-demo-7fdd89cc6b-xr5dl 1/1 Running 0 4m40s 10.0.2.77 cn-beijing.10.0.2.53 +``` + +The stress pods are scheduled on `cn-beijing.10.0.2.53` and `cn-beijing.10.0.2.54`. + +3. Check the load of each node. + +```bash +$ kubectl top node +NAME CPU(cores) CPU% MEMORY(bytes) MEMORY% +cn-beijing.10.0.2.53 3825m 98% 4051Mi 31% +cn-beijing.10.0.2.54 2155m 55% 4500Mi 35% +cn-beijing.10.0.2.58 182m 4% 1367Mi 10% +``` + +In above order, `cn-beijing.10.0.2.53` and `cn-beijing.10.0.2.54` have the highest load, while `cn-beijing.10.0.2.58` has the lowest load. + +4. Update `koord-descheduler-config` to enable `LowNodeLoad` plugin. + +5. Observe the Pod changes and wait for the koord-descheduler to execute the eviction/migration operation. + +```bash +$ kubectl get pod -w +NAME READY STATUS RESTARTS AGE +stress-demo-7fdd89cc6b-lml7k 1/1 Running 0 22m +stress-demo-7fdd89cc6b-xr5dl 1/1 Running 0 5m45s +stress-demo-7fdd89cc6b-xr5dl 1/1 Terminating 0 5m59s +stress-demo-7fdd89cc6b-8k8wq 0/1 Pending 0 0s +stress-demo-7fdd89cc6b-8k8wq 0/1 Pending 0 0s +stress-demo-7fdd89cc6b-8k8wq 0/1 ContainerCreating 0 0s +stress-demo-7fdd89cc6b-8k8wq 0/1 ContainerCreating 0 1s +stress-demo-7fdd89cc6b-8k8wq 1/1 Running 0 3s +``` + +5. Observe the Event, you can see the following migration records + +```bash +$ kubectl get event |grep stress-demo-7fdd89cc6b-xr5dl +74s Normal Evicting podmigrationjob/e54863dc-b651-47e3-9ffd-08b6b4ff64d5 Pod "default/stress-demo-7fdd89cc6b-xr5dl" evicted from node "cn-beijing.10.0.2.53" by the reason "node is overutilized, cpu usage(56.13%)>threshold(50.00%)" +41s Normal EvictComplete podmigrationjob/e54863dc-b651-47e3-9ffd-08b6b4ff64d5 Pod "default/stress-demo-7fdd89cc6b-xr5dl" has been evicted +7m12s Normal Scheduled pod/stress-demo-7fdd89cc6b-xr5dl Successfully assigned default/stress-demo-7fdd89cc6b-xr5dl to cn-beijing.10.0.2.53 +7m12s Normal AllocIPSucceed pod/stress-demo-7fdd89cc6b-xr5dl Alloc IP 10.0.2.77/24 +7m12s Normal Pulling pod/stress-demo-7fdd89cc6b-xr5dl Pulling image "polinux/stress" +6m59s Normal Pulled pod/stress-demo-7fdd89cc6b-xr5dl Successfully pulled image "polinux/stress" in 12.685405843s +6m59s Normal Created pod/stress-demo-7fdd89cc6b-xr5dl Created container stress +6m59s Normal Started pod/stress-demo-7fdd89cc6b-xr5dl Started container stress +74s Normal Descheduled pod/stress-demo-7fdd89cc6b-xr5dl Pod evicted from node "cn-beijing.10.0.2.53" by the reason "node is overutilized, cpu usage(56.13%)>threshold(50.00%)" +73s Normal Killing pod/stress-demo-7fdd89cc6b-xr5dl Stopping container stress +7m13s Normal SuccessfulCreate replicaset/stress-demo-7fdd89cc6b Created pod: stress-demo-7fdd89cc6b-xr5dl +``` diff --git a/versioned_docs/version-v1.4/user-manuals/load-aware-scheduling.md b/versioned_docs/version-v1.4/user-manuals/load-aware-scheduling.md new file mode 100644 index 000000000..ef59b32d1 --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/load-aware-scheduling.md @@ -0,0 +1,324 @@ +# Load Aware Scheduling + +Load Aware Scheduling is an ability of koord-scheduler for balancing pods scheduling based on the real-time load of each node. + +## Introduction + +Load balancing is a common issue in resource scheduling. Under-utilized nodes bring much resource waste to the +cluster, while over-utilized nodes are likely to cause performance degradation. Neither of them is suitable for +efficient resource management. + +The native Kubernetes scheduler schedules pods based on the requests and the allocation of nodes, considering neither +the real-time load nor the estimated usage. When we want to balance the pod scheduling on each node and make the loads +even with the native scheduler, we need to set precise resource requirements for the applications. Moreover, since +Koordinator enables resource overcommitment to achieve better resource efficiency, we need a mechanism to reduce the +probability of performance degradation and avoid over-utilization. + +Koord-scheduler can retrieve node metrics by cooperating with the koordlet. It provides the ability to balance the +scheduling of both the online (LSE/LSR/LS) pods and offline (BE) pods based on node utilization. + +![image](/img/load-aware-scheduling-arch.svg) + +For more information, please see [Design: Load Aware Scheduling](/docs/designs/load-aware-scheduling). + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 +- Koordinator >= 0.4 + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to [Installation](/docs/installation). + +### Global Configuration via plugin args + +Load-aware scheduling is *Enabled* by default. You can use it without any modification on the koord-scheduler config. + +For users who need deep insight, please configure the rules of load-aware scheduling by modifying the ConfigMap +`koord-scheduler-config` in the helm chart. + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: koord-scheduler-config + ... +data: + koord-scheduler-config: | + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: KubeSchedulerConfiguration + profiles: + - schedulerName: koord-scheduler + plugins: + # enable the LoadAwareScheduling plugin + filter: + enabled: + - name: LoadAwareScheduling + ... + score: + enabled: + - name: LoadAwareScheduling + weight: 1 + ... + reserve: + enabled: + - name: LoadAwareScheduling + ... + pluginConfig: + # configure the thresholds and weights for the plugin + - name: LoadAwareScheduling + args: + apiVersion: kubescheduler.config.k8s.io/v1beta2 + kind: LoadAwareSchedulingArgs + # whether to filter nodes where koordlet fails to update NodeMetric + filterExpiredNodeMetrics: true + # the expiration threshold seconds when using NodeMetric + nodeMetricExpirationSeconds: 300 + # weights of resources + resourceWeights: + cpu: 1 + memory: 1 + # thresholds (%) of resource utilization + usageThresholds: + cpu: 75 + memory: 85 + # thresholds (%) of resource utilization of Prod Pods + prodUsageThresholds: + cpu: 55 + memory: 65 + # enable score according Prod usage + scoreAccordingProdUsage: true + # the factor (%) for estimating resource usage + estimatedScalingFactors: + cpu: 80 + memory: 70 + # enable resource utilization filtering and scoring based on percentile statistics + aggregated: + usageThresholds: + cpu: 65 + memory: 75 + usageAggregationType: "p99" + scoreAggregationType: "p99" +``` + +The koord-scheduler takes this ConfigMap as [scheduler Configuration](https://kubernetes.io/docs/reference/scheduling/config/). +New configurations will take effect after the koord-scheduler restarts. + +| Field | Description | Version | +|-------|-------------| --------| +| filterExpiredNodeMetrics | filterExpiredNodeMetrics indicates whether to filter nodes where koordlet fails to update NodeMetric. Enabled by default but in Helm chart, it's disabled. | >= v0.4.0 | +| nodeMetricExpirationSeconds | nodeMetricExpirationSeconds indicates the NodeMetric expiration in seconds. When NodeMetrics expired, the node is considered abnormal. Default is 180 seconds.| >= v0.4.0 | +| resourceWeights | resourceWeights indicates the weights of resources. The weights of CPU and Memory are both 1 by default.| >= v0.4.0 | +| usageThresholds | usageThresholds indicates the resource utilization threshold of the whole machine. The default for CPU is 65%, and the default for memory is 95%.| >= v0.4.0 | +| estimatedScalingFactors | estimatedScalingFactors indicates the factor when estimating resource usage. The default value of CPU is 85%, and the default value of Memory is 70%. | >= v0.4.0 | +| prodUsageThresholds| prodUsageThresholds indicates the resource utilization threshold of Prod Pods compared to the whole machine. Not enabled by default. | >= v1.1.0 | +| scoreAccordingProdUsage | scoreAccordingProdUsage controls whether to score according to the utilization of Prod Pod. | >= v1.1.0 | +| aggregated | aggregated supports resource utilization filtering and scoring based on percentile statistics. | >= v1.1.0 | + +The fields of Aggregated: + +| Field | Description | Version | +|-------|-------------| --------| +| usageThresholds | usageThresholds indicates the resource utilization threshold of the machine based on percentile statistics. | >= v1.1.0| +| usageAggregationType | usageAggregationType indicates the percentile type of the machine's utilization when filtering. Currently supports `avg`, `p50`, `p90`, `p95` and `p99`. | >= v1.1.0 | +| usageAggregatedDuration | usageAggregatedDuration indicates the statistical period of the percentile of the machine's utilization when filtering. When this field is not set, the scheduler uses the data of the maximum period in NodeMetrics by default. | >= v1.1.0| +| scoreAggregationType | scoreAggregationType indicates the percentile type of the machine's utilization when scoring. Currently supports `avg`, `p50`, `p90`, `p95` and `p99`. | >= v1.1.0 +| scoreAggregatedDuration | scoreAggregatedDuration indicates the statistical period of the percentile of Prod Pod's utilization when scoring. When this field is not set, the scheduler uses the data of the maximum period in NodeMetrics by default. | >= v1.1.0 | + +### Configure filter thresholds by Node + +The configuration through the plugin can be used as the default global configuration of the cluster, and users can also set the load thresholds of the node dimension by appending annotation to the node. When the annotation exists on the node, it will be filtered according to the parameters specified by the annotation. + +The annotation is defined as follows: + +```go +const ( + AnnotationCustomUsageThresholds = "scheduling.koordinator.sh/usage-thresholds" +) + +// CustomUsageThresholds supports user-defined node resource utilization thresholds. +type CustomUsageThresholds struct { + // UsageThresholds indicates the resource utilization threshold of the whole machine. + UsageThresholds map[corev1.ResourceName]int64 `json:"usageThresholds,omitempty"` + // ProdUsageThresholds indicates the resource utilization threshold of Prod Pods compared to the whole machine + ProdUsageThresholds map[corev1.ResourceName]int64 `json:"prodUsageThresholds,omitempty"` + // AggregatedUsage supports resource utilization filtering and scoring based on percentile statistics + AggregatedUsage *CustomAggregatedUsage `json:"aggregatedUsage,omitempty"` +} + +type CustomAggregatedUsage struct { + // UsageThresholds indicates the resource utilization threshold of the machine based on percentile statistics + UsageThresholds map[corev1.ResourceName]int64 `json:"usageThresholds,omitempty"` + // UsageAggregationType indicates the percentile type of the machine's utilization when filtering + UsageAggregationType slov1alpha1.AggregationType `json:"usageAggregationType,omitempty"` + // UsageAggregatedDuration indicates the statistical period of the percentile of the machine's utilization when filtering + UsageAggregatedDuration *metav1.Duration `json:"usageAggregatedDuration,omitempty"` +} +``` + +## Use Load Aware Scheduling + +### Load-aware scheduling by the whole machine load + +The example cluster in this article has three 4-core 16GiB nodes. + +1. Deploy a `stress` pod with the YAML file below. + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: stress-demo + namespace: default + labels: + app: stress-demo +spec: + replicas: 1 + selector: + matchLabels: + app: stress-demo + template: + metadata: + name: stress-demo + labels: + app: stress-demo + spec: + containers: + - args: + - '--vm' + - '2' + - '--vm-bytes' + - '1600M' + - '-c' + - '2' + - '--vm-hang' + - '2' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + limits: + cpu: '2' + memory: 4Gi + requests: + cpu: '2' + memory: 4Gi + restartPolicy: Always + schedulerName: koord-scheduler # use the koord-scheduler +``` + +```bash +$ kubectl create -f stress-demo.yaml +deployment.apps/stress-demo created +``` + +2. Watch the pod status util it becomes running. + +```bash +$ kubectl get pod -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +stress-demo-7fdd89cc6b-gcnzn 1/1 Running 0 82s 10.0.3.114 cn-beijing.10.0.3.112 +``` + +The pod `stress-demo-7fdd89cc6b-gcnzn` is scheduled on `cn-beijing.10.0.3.112`. + +3. Check the load of each node. + +```bash +$ kubectl top node +NAME CPU(cores) CPU% MEMORY(bytes) MEMORY% +cn-beijing.10.0.3.110 92m 2% 1158Mi 9% +cn-beijing.10.0.3.111 77m 1% 1162Mi 9% +cn-beijing.10.0.3.112 2105m 53% 3594Mi 28% +``` + +In above order, `cn-beijing.10.0.3.112` has the highest load, while `cn-beijing.10.0.3.111` has the lowest load. + +4. Deploy an `nginx` deployment with the YAML file below. + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-with-loadaware + labels: + app: nginx +spec: + replicas: 6 + selector: + matchLabels: + app: nginx + template: + metadata: + name: nginx + labels: + app: nginx + spec: + schedulerName: koord-scheduler # use the koord-scheduler + containers: + - name: nginx + image: nginx + resources: + limits: + cpu: 500m + requests: + cpu: 500m +``` + +```bash +$ kubectl create -f nginx-with-loadaware.yaml +deployment/nginx-with-loadawre created +``` + +5. Check the scheduling results of `nginx` pods. + +```bash +$ kubectl get pods | grep nginx +nginx-with-loadaware-5646666d56-224jp 1/1 Running 0 18s 10.0.3.118 cn-beijing.10.0.3.110 +nginx-with-loadaware-5646666d56-7glt9 1/1 Running 0 18s 10.0.3.115 cn-beijing.10.0.3.110 +nginx-with-loadaware-5646666d56-kcdvr 1/1 Running 0 18s 10.0.3.119 cn-beijing.10.0.3.110 +nginx-with-loadaware-5646666d56-qzw4j 1/1 Running 0 18s 10.0.3.113 cn-beijing.10.0.3.111 +nginx-with-loadaware-5646666d56-sbgv9 1/1 Running 0 18s 10.0.3.120 cn-beijing.10.0.3.111 +nginx-with-loadaware-5646666d56-z79dn 1/1 Running 0 18s 10.0.3.116 cn-beijing.10.0.3.111 +``` + +Now we can see `nginx` pods get scheduled on the nodes other than `cn-beijing.10.0.3.112` (node with the highest load). + + +### Load-aware scheduling by the Prod Pods + +If there are many BestEffort Pods scheduled in one Node, the latency-sensitive Pods may fail to schedule cause the load of node has reached the limit of usage. In Koordinator v1.1.0, load-aware scheduling is optimized for this scenario. For latency-sensitive(LSE/LSR/LS) Pods, priority is given to scheduling to the nodes with low total utilization of the Prod Pods. BestEffort(BE) Pods are scheduled according to the utilization level of the whole node. + +Enable relevant optimizations by setting the following parameters: + +| Field | Description | Version | +|-------|-------------| --------| +| prodUsageThresholds| prodUsageThresholds indicates the resource utilization threshold of Prod Pods compared to the whole machine. Not enabled by default. | >= v1.1.0 | +| scoreAccordingProdUsage | scoreAccordingProdUsage controls whether to score according to the utilization of Prod Pod. | >= v1.1.0 | + +### Load-aware scheduling based on percentile statistics + +In Koordinator v1.0 and previous versions, load-aware scheduling is filtered and scored according to the average utilization data reported by koordlet. But the average value hides a lot of information, so in Koordinator v1.1, koordlet adds utilization aggregation data based on percentile statistics. Corresponding adaptations have also been made on the scheduler side. + +Enable relevant optimizations by setting the following parameters: + +| Field | Description | Version | +|-------|-------------| --------| +| aggregated | aggregated supports resource utilization filtering and scoring based on percentile statistics. | >= v1.1.0 | + +The fields of Aggregated: + +| Field | Description | Version | +|-------|-------------| --------| +| usageThresholds | usageThresholds indicates the resource utilization threshold of the machine based on percentile statistics. | >= v1.1.0| +| usageAggregationType | usageAggregationType indicates the percentile type of the machine's utilization when filtering. Currently supports `avg`, `p50`, `p90`, `p95` and `p99`. | >= v1.1.0 | +| usageAggregatedDuration | usageAggregatedDuration indicates the statistical period of the percentile of the machine's utilization when filtering. When this field is not set, the scheduler uses the data of the maximum period in NodeMetrics by default. | >= v1.1.0| +| scoreAggregationType | scoreAggregationType indicates the percentile type of the machine's utilization when scoring. Currently supports `avg`, `p50`, `p90`, `p95` and `p99`. | >= v1.1.0 +| scoreAggregatedDuration | scoreAggregatedDuration indicates the statistical period of the percentile of Prod Pod's utilization when scoring. When this field is not set, the scheduler uses the data of the maximum period in NodeMetrics by default. | >= v1.1.0 | + +The `aggregated` and the `usageThresholds` parameter are mutually exclusive. When both are configured, the `aggregated` will be used. +In addition, Pod type awareness is not currently supported. \ No newline at end of file diff --git a/versioned_docs/version-v1.4/user-manuals/memory-evict.md b/versioned_docs/version-v1.4/user-manuals/memory-evict.md new file mode 100644 index 000000000..972d9cf6c --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/memory-evict.md @@ -0,0 +1,132 @@ +# Eviction Strategy base on Memory Usage + +## Introduction + +Koordinator supports the dynamic overcommitment from idle resources on node to low-priority +Pods as Batch priority. In co-location scenarios, the actual memory resource usage of +nodes is constantly changing. For incompressible resources such as memory, high resource +usage of node may cause OOM, which results in the high-priority Pod got killed. Koordinator +provides an eviction strategy based on the memory usage node. `Koordlet` will continuously +detect the memory usage of node (Total-Available) in second-level granularity. +When the resource memory usage of node is high, it will evict low-priority BE Pods to +ensure the QoS of high-priority pods until the memory usage of node reduces below to the +threshold (evictThreshold). During the eviction process, Pods with lower priority(Pod.Spec.Priority) +will be selected first, and if the priority is the same, Pods which consume more memory will be +evicted first. + + +![image](/img/memory-evict.svg) + +### Prerequisite +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to +[Installation](/docs/installation). + +| Component | Version Requirement | +| --- | ------- | +| Kubernetes | ≥v1.18 | +| koordinator | ≥v0.3.0 | + +The eviction strategy is provided by `Koordlet`, which is disabled by default in feature-gate. +Please make sure the `BEMemoryEvict=true` field has been added in the `-feature-gates` arguments of `Koordlet` +as the [example](https://github.com/koordinator-sh/charts/blob/main/versions/v1.2.0/templates/koordlet.yaml#L36)。 + +## Use Eviction Strategy base on Memory Usage + +1. Create a configmap.yaml file based on the following ConfigMap content: + ```yaml + #ConfigMap slo-controller-config example。 + apiVersion: v1 + kind: ConfigMap + metadata: + name: slo-controller-config # name should be set as the configuration of koord-manager, e.g. ack-slo-config + namespace: koordinator-system # namespace should be set as the configuration of installation, e.g. kube-system + data: + # enable the eviction strategy base on Memory Usage + resource-threshold-config: | + { + "clusterStrategy": { + "enable": true, + "memoryEvictThresholdPercent": 70 + } + } + ``` + + | Configuration item | Parameter | Valid values | Description | + | :-------------- | :------ | :-------- | :----------------------------------------------------------- | + | `enable` | Boolean | true; false | true:enable the eviction.; false(default):disable the eviction. | + | `memoryEvictThresholdPercent` | Int | 0~100 | eviction threshold percent of node memory usage, default is 70. | + +2. Check whether a ConfigMap named `slo-controller-config` exists in the `koordinator-system` namespace. + + - If a ConfigMap named `slo-controller-config` exists, we commend that you run the kubectl patch command to update the ConfigMap. This avoids changing other settings in the ConfigMap. + + ```bash + kubectl patch cm -n koordinator-system slo-controller-config --patch "$(cat configmap.yaml)" + ``` + + - If no ConfigMap named `slo-controller-config` exists, run the kubectl patch command to create a ConfigMap named ack-slo-config: + + ```bash + kubectl apply -f configmap.yaml + ``` + +3. Create a file named be-pod-demo.yaml based on the following YAML content: + + ```yaml + apiVersion: v1 + kind: Pod + metadata: + name: be-pod-demo + labels: + koordinator.sh/qosClass: 'BE' # set Pod QoS as BE + spec: + containers: + - args: + - '-c' + - '1' + - '--vm' + - '1' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + restartPolicy: Always + schedulerName: default-scheduler + # priorityClassName is required when ColocationProfile enabled (default). + priorityClassName: koord-batch + ``` + +4. Run the following command to deploy the be-pod-demo pod in the cluster: + + ```bash + kubectl apply -f be-pod-demo.yaml + ``` + +5. Run the following command to check the be-pod-demo pod in Running state: + + ```bash + $ kubectl get pod be-pod-demo + NAME READY STATUS RESTARTS AGE + be-pod-demo 1/1 Running 0 7s + ``` +6. Run the following command through [stress tool](https://linux.die.net/man/1/stress) +make sure the memory usage of node is above the threshold config, and the argument `--vm-bytes` +means the process will consume 10GB memory, this should be adjusted according to the node capacity. + + ```bash + $ stress --cpu 1 --vm 1 --vm-bytes 10G --vm-keep + ``` + +7. Check the running state of be-pod-demo, then you can find the be-pod-demo pod is not exist, +and the eviction information can be found in events. + + ```bash + $ kubectl get pod be-pod-demo + Error from server (NotFound): pods "be-pod-demo" not found + + $ kubectl get event + LAST SEEN TYPE REASON OBJECT MESSAGE + 46s Normal Killing pod/be-pod-demo Stopping container stress + 48s Warning evictPodSuccess ${your-pod-object} evict Pod:be-pod-demo, reason: EvictPodByNodeMemoryUsage, message: killAndEvictBEPods for node(${your-node-id}), need to release memory: 8077889699 + ``` \ No newline at end of file diff --git a/versioned_docs/version-v1.4/user-manuals/memory-qos.md b/versioned_docs/version-v1.4/user-manuals/memory-qos.md new file mode 100644 index 000000000..66f5e60f9 --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/memory-qos.md @@ -0,0 +1,355 @@ +# Memory QoS + +## Introduction + +The Koordlet provides the *Memory Quality of Service* (QoS) feature for containers. You can use this feature to +optimize the performance of memory-sensitive applications while ensuring fair memory scheduling among containers. This +topic describes how to enable the memory QoS feature for containers. + +### Background + +The following memory limits apply to containers: + +- The memory limit of the container. If the amount of memory that a container uses, including the page cache, is about + to reach the memory limit of the container, the memory reclaim mechanism of the OS kernel is triggered. As a result, + the application in the container may not be able to request or release memory resources as normal. +- The memory limit of the node. If the memory limit of a container is greater than the memory request of the container, + the container can overcommit memory resources. In this case, the available memory on the node may become insufficient. + This causes the OS kernel to reclaim memory from containers. As a result, the performance of your application is + downgraded. In extreme cases, the node cannot run as normal. + +To improve the performance of applications and the stability of nodes, Koordinator provides the memory QoS feature for +containers. We recommend that you use Anolis OS as the node OS. For other OS, we will try our best to adapt, and users +can still enable it without side effects. After you enable the memory QoS feature for a container, Koordlet +automatically configures the memory control group (memcg) based on the configuration of the container. This helps you +optimize the performance of memory-sensitive applications while ensuring fair memory scheduling on the node. + +Memory QoS provides the following optimizations to improve the memory utilization of pods: + +- When the memory used by a pod is about to reach the memory limit of the pod, the memcg performs asynchronous reclaim for a specific amount of memory. This prevents the reclaim of all the memory that the pod uses and therefore minimizes the adverse impact on the application performance caused by direct memory reclaim. +- Memory reclaim is performed in a fairer manner among pods. When the available memory on a node becomes insufficient, memory reclaim is first performed on pods that use more memory than their memory requests. This ensures sufficient memory on the node when a pod applies for a large amount of memory. +- If the BestEffort pods on a node use more memory than their memory requests, the system prioritizes the memory requirements of Guaranteed pods and Burstable pods over the memory requirements of BestEffort pods. + +![image](/img/memory-qos.png) + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 +- Koordinator >= 0.3 + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to +[Installation](/docs/installation). + +### Configurations + +Koordlet has already enabled Memory QoS feature (`-feature-gates=AllAlpha=true`). +If not, please enable it manually by updating the feature gate in the koordlet daemonset. + +> NOTE: Memory QoS is controlled by the `CgroupReconcile` feature-gate. + +```yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: koordlet +spec: + selector: + matchLabels: + koord-app: koordlet + template: + metadata: + labels: + koord-app: koordlet + spec: + containers: + - command: + - /koordlet + args: + - -CgroupRootDir=/host-cgroup/ + - -feature-gates=XXXX,CgroupReconcile=true # enable CPU Burst feature + ... +``` + +## Use Memory QoS + +When you enable memory QoS for the containers in a pod, the memcg is automatically configured based on the specified +ratios and pod parameters. To enable memory QoS for the containers in a pod, perform the following steps. + +### Use an annotation to enable Memory QoS for the pod + +Add the following annotations to enable memory QoS for the containers in a pod: + +```yaml +annotations: + # To enable memory QoS for the containers in a pod, set the value to auto. + koordinator.sh/memoryQOS: '{"policy": "auto"}' + # To disable memory QoS for the containers in a pod, set the value to none. + #koordinator.sh/memoryQOS: '{"policy": "none"}' +``` + +### Use a ConfigMap to enable memory QoS for all the containers in a cluster + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + resource-qos-config: |- + { + "clusterStrategy": { + "lsClass": { + "memoryQOS": { + "enable": true + } + }, + "beClass": { + "memoryQOS": { + "enable": true + } + } + } + } +``` + +### (Optional) Advanced Settings + +The following table describes the advanced parameters that you can use to configure fine-grained memory QoS +configurations at the pod level and cluster level. + +| Parameter | Data type | Valid value | Description | +| ------------------- | ----------- | --------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| enable | Boolean |
  • true
  • false
|
  • true: enables memory QoS for all the containers in a cluster. The default memory QoS settings for the QoS class of the containers are used.
  • false: disables memory QoS for all the containers in a cluster. The memory QoS settings are restored to the original settings for the QoS class of the containers.
| +| policy | String |
  • auto
  • default
  • none
|
  • auto: enables memory QoS for the containers in the pod and uses the recommended memory QoS settings. The recommended memory QoS settings are prioritized over the cluster-wide memory QoS settings.
  • default: specifies that the pod inherits the cluster-wide memory QoS settings.
  • none: disables memory QoS for the pod. The relevant memory QoS settings are restored to the original settings. The original settings are prioritized over the cluster-wide memory QoS settings.
| +| minLimitPercent | Int | 0~100 | Unit: %. Default value:`0`. The default value indicates that this parameter is disabled. This parameter specifies the unreclaimable proportion of the memory request of a pod. The amount of unreclaimable memory is calculated based on the following formula: `Value of memory.min = Memory request × Value of minLimitPercent/100`. This parameter is suitable for scenarios where applications are sensitive to the page cache. You can use this parameter to cache files to optimize read and write performance. For example, if you specify Memory `Request=100MiB` and `minLimitPercent=100` for a container, `the value of memory.min is 104857600`. | +| lowLimitPercent | Int | 0~100 | Unit: %. Default value:`0`. The default value indicates that this parameter is disabled. This parameter specifies the relatively unreclaimable proportion of the memory request of a pod. The amount of relatively unreclaimable memory is calculated based on the following formula: `Value of memory.low = Memory request × Value of lowLimitPercent/100`. For example, if you specify `Memory Request=100MiB` and `lowLimitPercent=100` for a container, `the value of memory.low is 104857600`. | +| throttlingPercent | Int | 0~100 | Unit: %. Default value:`0`. The default value indicates that this parameter is disabled. This parameter specifies the memory throttling threshold for the ratio of the memory usage of a container to the memory limit of the container. The memory throttling threshold for memory usage is calculated based on the following formula: `Value of memory.high = Memory limit × Value of throttlingPercent/100`. If the memory usage of a container exceeds the memory throttling threshold, the memory used by the container will be reclaimed. This parameter is suitable for container memory overcommitment scenarios. You can use this parameter to cgroups from triggering OOM. For example, if you specify `Memory Limit=100MiB` and `throttlingPercent=80` for a container, `the value of memory.high is 83886080`, which is equal to 80 MiB. | +| wmarkRatio | Int | 0~100 | Unit: %. Default value:`95`. A value of `0` indicates that this parameter is disabled. This parameter specifies the threshold of the usage of the memory limit or the value of `memory.high` that triggers asynchronous memory reclaim. If `throttlingPercent` is disabled, the asynchronous memory reclaim threshold for memory usage is calculated based on the following formula: `Value of memory.wmark_high = Memory limit × wmarkRatio/100`. If `throttlingPercent` is enabled, the asynchronous memory reclaim threshold for memory usage is calculated based on the following formula: `Value of memory.wmark_high = Value of memory.high × wmarkRatio/100`. If the usage of the memory limit or the value of memory.high exceeds the threshold, the memcg backend asynchronous reclaim feature is triggered. For example, if you specify `Memory Limit=100MiB`for a container, the memory throttling setting is`memory.high=83886080`, the reclaim ratio setting is `memory.wmark_ratio=95`, and the reclaim threshold setting is `memory.wmark_high=79691776`. | +| wmarkMinAdj | Int | -25~50 | Unit: %. The default value is `-25` for the `LS`/ `LSR` QoS class and `50` for the `BE` QoS class. A value of 0 indicates that this parameter is disabled. This parameter specifies the adjustment to the global minimum watermark for a container. A negative value decreases the global minimum watermark and therefore postpones memory reclaim for the container. A positive value increases the global minimum watermark and therefore antedates memory reclaim for the container. For example, if you create a pod whose QoS class is LS, the default setting of this parameter is `memory.wmark_min_adj=-25`, which indicates that the minimum watermark is decreased by 25% for the containers in the pod. | + +### Example + +0. The testing environment is shown below: + +- Kubernetes: 1.20 +- Nodes: + - Stress Node: an ECS instance (8 vCPU, 32GB RAM) for performing stress tests. + - Tested Node: an ECS instance (8 vCPU, 32GB RAM) runs the workload and serves. + +1. Create a file named redis-demo.yaml with the following YAML template: + +```yaml +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: redis-demo-config +data: + redis-config: | + appendonly yes + appendfsync no +--- +apiVersion: v1 +kind: Pod +metadata: + name: redis-demo + labels: + name: redis-demo + annotations: + koordinator.sh/memoryQOS: '{"policy": "auto"}' # Add this annotation to enable memory QoS + koordinator.sh/qosClass: 'LS' # Set the QoS class of the Redis pod to LS +spec: + containers: + - name: redis + image: redis:5.0.4 + command: + - redis-server + - "/redis-master/redis.conf" + env: + - name: MASTER + value: "true" + ports: + - containerPort: 6379 + resources: + limits: + cpu: "2" + memory: "6Gi" + requests: + cpu: "2" + memory: "2Gi" + volumeMounts: + - mountPath: /redis-master-data + name: data + - mountPath: /redis-master + name: config + volumes: + - name: data + emptyDir: {} + - name: config + configMap: + name: redis-demo-config + items: + - key: redis-config + path: redis.conf + nodeName: # Set nodeName to the name of the tested node +--- +apiVersion: v1 +kind: Service +metadata: + name: redis-demo +spec: + ports: + - name: redis-port + port: 6379 + protocol: TCP + targetPort: 6379 + selector: + name: redis-demo + type: ClusterIP +``` + +2. Run the following command to deploy Redis Server as the test application. + +You can access the redis-demo Service from within the cluster. + +```bash +kubectl apply -f redis-demo.yaml +``` + +3. Simulate the scenario of memory overcommitment. + +Use the Stress tool to increase the load on memory and trigger memory reclaim. The sum of the memory limits of all pods +on the node exceeds the physical memory of the node. + + a. Create a file named stress-demo.yaml with the following YAML template: + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: stress-demo + labels: + name: stress-demo + annotations: + koordinator.sh/memoryQOS: '{"policy": "auto"}' # Add this annotation to enable memory QoS + koordinator.sh/qosClass: 'BE' # Set the QoS class of the Stress pod to BE +spec: + containers: + - args: + - '--vm' + - '2' + - '--vm-bytes' + - 11G + - '-c' + - '2' + - '--vm-hang' + - '2' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + restartPolicy: Always + nodeName: # Set nodeName to the name of the tested node, which is the node on which the Redis pod is deployed +``` + + b. Run the following command to deploy stress-demo: + +```bash +kubectl apply -f stress-demo.yaml +``` + +4. Run the following command to query the global minimum watermark of the node: + +> Note In memory overcommitment scenarios, if the global minimum watermark of the node is set to a low value, OOM +> killers may be triggered for all pods on the node even before memory reclaim is performed. Therefore, we recommend +> that you set the global minimum watermark to a high value. In this example, the global minimum watermark is set +> to 4,000,000 KB for the tested node that has 32 GiB of memory. + +```bash +cat /proc/sys/vm/min_free_kbytes +``` + +Expected output: + +```bash +4000000 +``` + +5. Use the following YAML template to deploy the memtier-benchmark tool to send requests to the tested node: + +```yaml +apiVersion: v1 +kind: Pod +metadata: + labels: + name: memtier-demo + name: memtier-demo +spec: + containers: + - command: + - memtier_benchmark + - '-s' + - 'redis-demo' + - '--data-size' + - '200000' + - "--ratio" + - "1:4" + image: 'redislabs/memtier_benchmark:1.3.0' + name: memtier + restartPolicy: Never + nodeName: # Set nodeName to the name of the stress node that is used to send requests. +``` + +6. Run the following command to query the test results from memtier-benchmark: + +```bash +kubectl logs -f memtier-demo +``` + +7. Use the following YAML template to disable memory QoS for the Redis pod and Stress pod. Then, perform stress tests +again and compare the results. + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: redis-demo + labels: + name: redis-demo + annotations: + koordinator.sh/memoryQOS: '{"policy": "none"}' # Disable memory QoS. + koordinator.sh/qosClass: 'LS' +spec: + ... + +--- +apiVersion: v1 +kind: Pod +metadata: + name: stress-demo + labels: + name: stress-demo + annotations: + koordinator.sh/memoryQOS: '{"policy": "none"}' # Disable memory QoS. + koordinator.sh/qosClass: 'BE' +``` + +8. Check the results of Memory QoS enabled and disabled. + +- Disabled: Set the memory QoS policy of the pod to `none`. +- Enabled: Set the memory QoS policy of the pod to `auto` (the recommended parameters of memory QoS are used). + +| Metric | Disabled | Enabled | +| ----------------- | ------------- | ------------- | +| Latency-avg | 51.32 ms | 47.25 ms | +| Throughput-avg | 149.0 MB/s | 161.9 MB/s | + +The table shows that the latency of the Redis pod is reduced by 7.9% and the throughput of the Redis pod is increased +by 8.7% after memory QoS is enabled. This indicates that the memory QoS feature can optimize the performance of +applications in memory overcommitment scenarios. diff --git a/versioned_docs/version-v1.4/user-manuals/performance-collector.md b/versioned_docs/version-v1.4/user-manuals/performance-collector.md new file mode 100644 index 000000000..4cc3a3971 --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/performance-collector.md @@ -0,0 +1,184 @@ +# Performance Collector + +## Motivation + +In real production environment, the runtime state of a node is a "chaotic system", and application interference caused by resource contention cannot be absolutely avoided. Koordinator is building interference detection and optimization capabilities. By extracting metrics of application running status, real-time analysis and detection are carried out, and more targeted strategies are adopted for target applications and interference sources after interference is discovered. +Koordinator implements a series of `Performance Collectors` to collect low-level metrics highly correlated with application running status on one node, and expose them through `Prometheus` to provide support for interference detection capabilities and cluster application scheduling. + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 + +- Koordinator >= 1.0 + +- To use CPI Collector, make sure your node machine supports Cycles and Instructions Kernel PMU(Performance Monitoring Unit) events. + + > Use belowing command to check. + + ```shell + $ perf list + List of pre-defined events (to be used in -e): + + branch-instructions OR branches [Hardware event] + branch-misses [Hardware event] + bus-cycles [Hardware event] + ... + + cpu-cycles OR cpu/cpu-cycles/ [Kernel PMU event] + ... + instructions OR cpu/instructions/ [Kernel PMU event] + ``` + +- To use PSI Collector, your Anolis OS needs to enable PSI feature. Please refer to this [document](https://www.alibabacloud.com/help/en/elastic-compute-service/latest/enable-the-psi-feature-for-cgroup-v1). + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to [Installation](https://koordinator.sh/zh-Hans/docs/installation). + +### Feature-gates +Performance Collector is managed by several feature-gates. Koordinator currently supports following collectors: + +- `CPICollector`: manages CPI collector. CPI: Cycles Per Instruction. +- `PSICollector`:manages PSI collector. PSI: Pressure Stall Information. + +### Configuration + +Performance Collectors are _Disabled_ currently by default. To enable them, just edit Koordlet's `feature-gates` args. + +```shell +kubectl edit ds koordlet -n koordinator-system +``` + +```shell +spec: + ... + spec: + containers: + - args: + ... + # modify here + # - -feature-gates=BECPUEvict=true,BEMemoryEvict=true,CgroupReconcile=true,Accelerators=true + - -feature-gates=BECPUEvict=true,BEMemoryEvict=true,CgroupReconcile=true,Accelerators=true,CPICollector=true,PSICollector=true + ... +``` + +## Overhead + +Koordinator Performance Collector is an important tool for interference detection, and one of its core goals is to collect relevant indicators at low cost. The following shows the system overhead introduced by Koordinator before and after enabling Performance Collector. Users can refer to this test result to use the Performance Collector feature. + +### Testing Context + +- Alibaba Cloud Container Service for Kubernetes (ACK) Managed Kubernetes Cluster: + - Kubernetes version:1.24.6-aliyun.1 + - Container Runtime:containerd 1.5.13 + - Node Spec:ecs.ebmg6.26xlarge,104 vCPU 384 GiB, OS: Alibaba Cloud Linux 2.1903 +- Node pressure: + - Test Pod image:nginx:1.14.2 + - Number of Pods on single Node:100 test Pod + 50 system Pod + - Number of Containers on single Node:150 + - Node CPU usage: about 25%, use lookbusy-1.4 to generate on each CPU +- Others: + - 100 nginx Pods are managed by a Linux cronjob, which is deleted every five minutes. The Deployment controller rebuild these Pods in time. + - CPI Collector runs in a window of 10 seconds every 60 seconds. + - PSI Collector runs every 10 seconds. + - The test lasts for 1 hour before and after Performance Collector is enabled. + +### Conclusion + +#### Case 1:Overhead comparison of Koordlet container before and after enabling Performance Collector + +Performance Collector runs on the Koordlet component of Koordinator, and the cost of the component is compared as follows: + +- No significant increase in overall overhead: + + | Metrics | Disable | Enable | + | :--------------: | :------: | :-------------------: | + | RSS Memory usage | 341MiB | 366MiB | + | CPU usage | 0.5 core | 0.6 core | + | Network I/O | - | no significant change | +- Possible cause of the overhead: + - The new CPI data table of per Container dimension, and new PSI data table of both per Container and per Pod dimension. + - The consumption caused by the collector's goroutine per cgroup. + - The consumption caused by Prometheus Gauge. + +#### Case 2:Overhead comparison of Node before and after enabling Performance Collector + +Performance Collector uses the perf_event_open(2) system call, and its impact on the node is compared as follows: + +- No significant increase in overall overhead: + + | Metrics | Disable | Enable | + | :-------------------: | :-----: | :----: | + | System Mode CPU usage | 0.94% | 0.96% | + | User Mode CPU usage | 24.51% | 25.19% | + +- Possible cause of the overhead: + - Usage of perf_event_open(2) + - Enabling of PSI feature on OS + +## Example + +1. To enable Performance Collector: +```shell +helm install koordinator https://... --set featureGates="CPICollector=true,PSICollector=true" +``` + +2. Use belowing flags to config collectors' time window or collect intervals: + + | Flag | Default | Definition | + | :-----------------------------: | :-----: | :--------------------------------: | + | -cpi-collector-interval-seconds | 60 | Collect cpi interval by seconds | + | -collect-cpi-timewindow-seconds | 10 | Collect cpi time window by seconds | + | -psi-collector-interval-seconds | 10 | Collect psi interval by seconds | +3. We can see reported metric values at Prometheus port(9316 as default), the API path is `/metrics`, e.g., CPI is shown as two records of *cycles* and *instructions*: +```shell +$ curl http://localhost:9316/metrics + +# HELP koordlet_container_cpi Container cpi collected by koordlet +# TYPE koordlet_container_cpi gauge +koordlet_container_cpi{container_id="containerd://498de02ddd3ad7c901b3c80f96c57db5b3ed9a817dbfab9d16b18be7e7d2d047",container_name="koordlet",cpi_field="cycles",node="your-node-name",pod_name="koordlet-x8g2j",pod_namespace="koordinator-system",pod_uid="3440fb9c-423b-48e9-8850-06a6c50f633d"} 2.228107503e+09 +koordlet_container_cpi{container_id="containerd://498de02ddd3ad7c901b3c80f96c57db5b3ed9a817dbfab9d16b18be7e7d2d047",container_name="koordlet",cpi_field="instructions",node="your-node-name",pod_name="koordlet-x8g2j",pod_namespace="koordinator-system",pod_uid="3440fb9c-423b-48e9-8850-06a6c50f633d"} 4.1456092e+09 +``` + +4. Notice that we also provide ServiceMonitor for Koordlet to evict those metrics: + + ```yaml + apiVersion: v1 + kind: Service + metadata: + labels: + koord-app: koordlet + name: koordlet + namespace: koordinator-system + spec: + clusterIP: None + ports: + - name: koordlet-service + port: 9316 + targetPort: 9316 + selector: + koord-app: koordlet + --- + apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + labels: + koord-app: koordlet + name: koordlet + namespace: koordinator-system + spec: + endpoints: + - interval: 30s + port: koordlet-service + scheme: http + jobLabel: koord-app + selector: + matchLabels: + koord-app: koordlet + ``` + + You can find it in Promethues Targets: + + ![koordlet-servicemonitor-prometheus](/img/koordlet-servicemonitor-prometheus.png) diff --git a/versioned_docs/version-v1.4/user-manuals/pod-migration-job.md b/versioned_docs/version-v1.4/user-manuals/pod-migration-job.md new file mode 100644 index 000000000..e1a708902 --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/pod-migration-job.md @@ -0,0 +1,256 @@ +# PodMigrationJob + +Koordinator defines a CRD-based Pod migration API called `PodMigrationJob`, through which the descheduler or other automatic fault recovery components can evict or delete Pods more safely. + +## Introduction + +Migrating Pods is an important capability that many components (such as deschedulers) rely on, and can be used to optimize scheduling or help resolve workload runtime quality issues. We believe that pod migration is a complex process, involving steps such as auditing, resource allocation, and application startup, and is mixed with application upgrading, scaling scenarios, and resource operation and maintenance operations by cluster administrators. Therefore, how to manage the stability risk of this process to ensure that the application does not fail due to the migration of Pods is a very critical issue that must be resolved. + +Based on the final state-oriented migration capability of the PodMigrationJob CRD, we can track the status of each process during the migration process, perceive scenarios such as application upgrades and scaling to ensure the stability of the workload. + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 +- Koordinator >= 0.6 + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to [Installation](/docs/installation). + +### Configurations + +PodMigrationJob is *Enabled* by default. You can use it without any modification on the koord-descheduler config. + +## Use PodMigrationJob + +### Quick Start + +1. Create a Deployment `pod-demo` with the YAML file below. + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: pod-demo + namespace: default +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: pod-demo + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + creationTimestamp: null + labels: + app: pod-demo + name: stress + spec: + containers: + - args: + - -c + - "1" + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + limits: + cpu: "2" + memory: 4Gi + requests: + cpu: 200m + memory: 400Mi + restartPolicy: Always + schedulerName: koord-scheduler +``` + +```bash +$ kubectl create -f pod-demo.yaml +deployment.apps/pod-demo created +``` + +2. Check the scheduled result of the pod `pod-demo-0`. + +```bash +$ kubectl get pod -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +pod-demo-5f9b977566-c7lvk 1/1 Running 0 41s 10.17.0.9 node-0 +``` + +`pod-demo-5f9b977566-c7lvk` is scheduled on the node `node-0`. + +3. Create a `PodMigrationJob` with the YAML file below to migrate `pod-demo-0`. + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: PodMigrationJob +metadata: + name: migrationjob-demo +spec: + paused: false + ttl: 5m + mode: ReservationFirst + podRef: + namespace: default + name: pod-demo-5f9b977566-c7lvk +status: + phase: Pending +``` + +```bash +$ kubectl create -f migrationjob-demo.yaml +podmigrationjob.scheduling.koordinator.sh/migrationjob-demo created +``` + +5. Query migration status + +```bash +$ kubectl get podmigrationjob migrationjob-demo +NAME PHASE STATUS AGE NODE RESERVATION PODNAMESPACE POD NEWPOD TTL +migrationjob-demo Succeed Complete 37s node-1 d56659ab-ba16-47a2-821d-22d6ba49258e default pod-demo-5f9b977566-c7lvk pod-demo-5f9b977566-nxjdf 5m0s +``` + +From the above results, it can be observed that: +- **PHASE** is `Succeed`, **STATUS** is `Complete`, indicating that the migration is successful. +- **NODE** `node-1` indicates the node where the new Pod is scheduled after the migration. +- **RESERVATION** `d56659ab-ba16-47a2-821d-22d6ba49258e` is the Reservation created during migration. The PodMigrationJob Controller will try to create the reserved resource for the Reservation before starting to evict the Pod. After the reservation is successful, the eviction will be initiated, which can ensure that the new Pod must be expelled. There are resources available. +- **PODNAMESPACE** `default` represents the namespace where the migrated Pod is located, +- **POD** `pod-demo-5f9b977566-c7lvk` represents the Pod to be migrated, +- **NEWPOD** `pod-demo-5f9b977566-nxjdf` is the newly created Pod after migration. +- **TTL** indicates the TTL period of the current Job. + +6. Query migration events + +PodMigrationJob Controller will create Events for important steps in the migration process to help users diagnose migration problems + +```bash +$ kubectl describe podmigrationjob migrationjob-demo +... +Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal ReservationCreated 8m33s koord-descheduler Successfully create Reservation "d56659ab-ba16-47a2-821d-22d6ba49258e" + Normal ReservationScheduled 8m33s koord-descheduler Assigned Reservation "d56659ab-ba16-47a2-821d-22d6ba49258e" to node "node-1" + Normal Evicting 8m33s koord-descheduler Try to evict Pod "default/pod-demo-5f9b977566-c7lvk" + Normal EvictComplete 8m koord-descheduler Pod "default/pod-demo-5f9b977566-c7lvk" has been evicted + Normal Complete 8m koord-descheduler Bind Pod "default/pod-demo-5f9b977566-nxjdf" in Reservation "d56659ab-ba16-47a2-821d-22d6ba49258e" +``` + +### Advanced Configurations + +> The latest API can be found in [`pod_migration_job_types.go`](https://github.com/koordinator-sh/koordinator/blob/main/apis/scheduling/v1alpha1/pod_migration_job_types.go). + +### Example: Manually confirm whether the migration is allowed + +Eviction or migration operations that bring risks to the stability, so it is hoped to manually check and confirm that there is no error before initiating the migration operation, and then initiate the migration. + +Therefore, when creating a PodMigrationJob, set `spec.paused` to `true`, and set `spec.paused` to `false` after manually confirming that execution is allowed. +If you refuse to execute, you can update `status.phase=Failed` to terminate the execution of the PodMigrationJob immediately or wait for the PodMigrationJob to expire automatically. + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: PodMigrationJob +metadata: + name: migrationjob-demo +spec: + # paused indicates whether the PodMigrationJob should to work or not. + paused: true + # ttl controls the PodMigrationJob timeout duration. + ttl: 5m + mode: ReservationFirst + podRef: + namespace: default + name: pod-demo-5f9b977566-c7lvk +status: + phase: Pending +``` + +### Example: Just want to evict Pods, no need to reserve resources + +PodMigrationJob provides two migration modes: +- `EvictDirectly` is directly evict Pod, no need to reserve resources, +- `ReservationFirst` reserves resources first to ensure that resources can be allocated before initiating eviction. + +If just want to evict Pods, just set `spec.mode` to `EvictDirectly` + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: PodMigrationJob +metadata: + name: migrationjob-demo +spec: + paused: false + ttl: 5m + mode: EvictDirectly + podRef: + namespace: default + name: pod-demo-5f9b977566-c7lvk +status: + phase: Pending +``` + +### Example: Use reserved resources when migrating + +In some scenarios, resources are reserved first, and then a PodMigrationJob is created after success. +The arbitration mechanism provided by the PodMigrationJob Controller (BTW: will be implemented in v0.7) is reused to ensure workload stability. + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: PodMigrationJob +metadata: + name: migrationjob-demo +spec: + paused: false + ttl: 5m + mode: ReservationFirst + podRef: + namespace: default + name: pod-demo-5f9b977566-c7lvk + reservationOptions: + # the reservation-0 created before creating PodMigrationJob + reservationRef: + name: reservation-0 +status: + phase: Pending +``` + +### Example: Evicting Pods Gracefully + +PodMigrationJob supports graceful eviction of pods. + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: PodMigrationJob +metadata: + name: migrationjob-demo +spec: + paused: true + ttl: 5m + mode: ReservationFirst + podRef: + namespace: default + name: pod-demo-5f9b977566-c7lvk + deleteOptions: + # The duration in seconds before the object should be deleted. Value must be non-negative integer. + # The value zero indicates delete immediately. If this value is nil, the default grace period for the + # specified type will be used. + # Defaults to a per object value if not specified. zero means delete immediately. + gracePeriodSeconds: 60 +status: + phase: Pending +``` + + +### Known Issues +- [Arbitration mechanism](https://github.com/koordinator-sh/koordinator/blob/main/docs/proposals/scheduling/20220701-pod-migration-job.md#filter-podmigrationjob) is not currently supported. The v0.6 version only implements the migration capability based on resource reservation +- [Basic Migration API](https://github.com/koordinator-sh/koordinator/blob/main/docs/proposals/scheduling/20220701-pod-migration-job.md#basic-migration-api) is not currenty supported \ No newline at end of file diff --git a/versioned_docs/version-v1.4/user-manuals/resource-reservation.md b/versioned_docs/version-v1.4/user-manuals/resource-reservation.md new file mode 100644 index 000000000..88cd888ab --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/resource-reservation.md @@ -0,0 +1,449 @@ +# Resource Reservation + +Resource Reservation is an ability of koord-scheduler for reserving node resources for specific pods or workloads. + +## Introduction + +Pods are fundamental for allocating node resources in Kubernetes, which bind resource requirements with business logic. +However, we may allocate resources for specific pods or workloads not created yet in the scenarios below: + +1. Preemption: Existing preemption does not guarantee that only preempting pods can allocate preempted resources. We expect that the scheduler can "lock" resources preventing from allocation of other pods even if they have the same or higher priorities. +2. De-scheduling: For the descheduler, it is better to ensure sufficient resources before pods get rescheduled. Otherwise, rescheduled pods may not be runnable anymore and make the belonging application disrupted. +3. Horizontal scaling: To achieve more deterministic horizontal scaling, we expect to allocate node resources for the replicas to scale. +4. Resource Pre-allocation: We may want to pre-allocate node resources for future resource demands even if the resources are not currently allocatable. + +To enhance the resource scheduling of Kubernetes, koord-scheduler provides a scheduling API named `Reservation`, which allows us to reserve node resources for specified pods or workloads even if they haven't get created yet. + +![image](/img/resource-reservation.svg) + +For more information, please see [Design: Resource Reservation](../designs/resource-reservation). + +## Setup + +### Prerequisite + +- Kubernetes >= 1.18 +- Koordinator >= 0.6 + +### Installation + +Please make sure Koordinator components are correctly installed in your cluster. If not, please refer to [Installation](/docs/installation). + +### Configurations + +Resource Reservation is *Enabled* by default. You can use it without any modification on the koord-scheduler config. + +## Use Resource Reservation + +### Quick Start + +1. Deploy a reservation `reservation-demo` with the YAML file below. + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: Reservation +metadata: + name: reservation-demo +spec: + template: # set resource requirements + namespace: default + spec: + containers: + - args: + - '-c' + - '1' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: # reserve 500m cpu and 800Mi memory + requests: + cpu: 500m + memory: 800Mi + schedulerName: koord-scheduler # use koord-scheduler + owners: # set the owner specifications + - object: # owner pods whose name is `default/pod-demo-0` + name: pod-demo-0 + namespace: default + ttl: 1h # set the TTL, the reservation will get expired 1 hour later +``` + +```bash +$ kubectl create -f reservation-demo.yaml +reservation.scheduling.koordinator.sh/reservation-demo created +``` + +2. Watch the reservation status util it becomes available. + +```bash +$ kubectl get reservation reservation-demo -o wide +NAME PHASE AGE NODE TTL EXPIRES +reservation-demo Available 88s node-0 1h +``` + +3. Deploy a pod `pod-demo-0` with the YAML file below. + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: pod-demo-0 # match the owner spec of `reservation-demo` +spec: + containers: + - args: + - '-c' + - '1' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + limits: + cpu: '1' + memory: 1Gi + requests: + cpu: 200m + memory: 400Mi + restartPolicy: Always + schedulerName: koord-scheduler # use koord-scheduler +``` + +```bash +$ kubectl create -f pod-demo-0.yaml +pod/pod-demo-0 created +``` + +4. Check the scheduled result of the pod `pod-demo-0`. + +```bash +$ kubectl get pod pod-demo-0 -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +pod-demo-0 1/1 Running 0 32s 10.17.0.123 node-0 +``` + +`pod-demo-0` is scheduled at the same node with `reservation-demo`. + +5. Check the status of the reservation `reservation-demo`. + +```bash +$ kubectl get reservation reservation-demo -oyaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: Reservation +metadata: + name: reservation-demo + creationTimestamp: "YYYY-MM-DDT05:24:58Z" + uid: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + ... +spec: + owners: + - object: + name: pod-demo-0 + namespace: default + template: + spec: + containers: + - args: + - -c + - "1" + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + requests: + cpu: 500m + memory: 800Mi + schedulerName: koord-scheduler + ttl: 1h +status: + allocatable: # total reserved + cpu: 500m + memory: 800Mi + allocated: # current allocated + cpu: 200m + memory: 400Mi + conditions: + - lastProbeTime: "YYYY-MM-DDT05:24:58Z" + lastTransitionTime: "YYYY-MM-DDT05:24:58Z" + reason: Scheduled + status: "True" + type: Scheduled + - lastProbeTime: "YYYY-MM-DDT05:24:58Z" + lastTransitionTime: "YYYY-MM-DDT05:24:58Z" + reason: Available + status: "True" + type: Ready + currentOwners: + - name: pod-demo-0 + namespace: default + uid: yyyyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy + nodeName: node-0 + phase: Available +``` + +Now we can see the reservation `reservation-demo` has reserved 500m cpu and 800Mi memory, and the pod `pod-demo-0` +allocates 200m cpu and 400Mi memory from the reserved resources. + +6. Cleanup the reservation `reservation-demo`. + +```bash +$ kubectl delete reservation reservation-demo +reservation.scheduling.koordinator.sh "reservation-demo" deleted +$ kubectl get pod pod-demo-0 +NAME READY STATUS RESTARTS AGE +pod-demo-0 1/1 Running 0 110s +``` + +After the reservation deleted, the pod `pod-demo-0` is still running. + +### Advanced Configurations + +> The latest API can be found in [`reservation_types`](https://github.com/koordinator-sh/koordinator/blob/main/apis/scheduling/v1alpha1/reservation_types.go). + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: Reservation +metadata: + name: reservation-demo +spec: + # pod template (required): Reserve resources and play pod/node affinities according to the template. + # The resource requirements of the pod indicates the resource requirements of the reservation + template: + namespace: default + spec: + containers: + - args: + - '-c' + - '1' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + requests: + cpu: 500m + memory: 800Mi + # scheduler name (required): use koord-scheduler to schedule the reservation + schedulerName: koord-scheduler + # owner spec (required): Specify what kinds of pods can allocate resources of this reservation. + # Currently support three kinds of owner specifications: + # - object: specify the name, namespace, uid of the owner pods + # - controller: specify the owner reference of the owner pods, e.g. name, namespace(extended by koordinator), uid, kind + # - labelSelector: specify the matching labels are matching expressions of the owner pods + owners: + - object: + name: pod-demo-0 + namespace: default + - labelSelector: + matchLabels: + app: app-demo + # TTL (optional): Time-To-Live duration of the reservation. The reservation will get expired after the TTL period. + # If not set, use `24h` as default. + ttl: 1h + # Expires (optional): Expired timestamp when the reservation is expected to expire. + # If both `expires` and `ttl` are set, `expires` is checked first. + expires: "YYYY-MM-DDTHH:MM:SSZ" +``` + + + +### Example: Reserve on Specified Node, with Multiple Owners + +1. Check the resources allocatable of each node. + +```bash +$ kubectl get node -o custom-columns=NAME:.metadata.name,CPU:.status.allocatable.cpu,MEMORY:.status.allocatable.memory +NAME CPU MEMORY +node-0 7800m 28625036Ki +node-1 7800m 28629692Ki +... +$ kubectl describe node node-1 | grep -A 8 "Allocated resources" + Allocated resources: + (Total limits may be over 100 percent, i.e., overcommitted.) + Resource Requests Limits + -------- -------- ------ + cpu 780m (10%) 7722m (99%) + memory 1216Mi (4%) 14044Mi (50%) + ephemeral-storage 0 (0%) 0 (0%) + hugepages-1Gi 0 (0%) 0 (0%) + hugepages-2Mi 0 (0%) 0 (0%) +``` + +As above, the node `node-1` has about 7.0 cpu and 26Gi memory unallocated. + +2. Deploy a reservation `reservation-demo-big` with the YAML file below. + +```yaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: Reservation +metadata: + name: reservation-demo-big +spec: + template: + namespace: default + spec: + containers: + - args: + - '-c' + - '1' + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: # reserve 6 cpu and 20Gi memory + requests: + cpu: 6 + memory: 20Gi + nodeName: node-1 # set the expected node name to schedule at + schedulerName: koord-scheduler + owners: # set multiple owners + - object: # owner pods whose name is `default/pod-demo-0` + name: pod-demo-1 + namespace: default + - labelSelector: # owner pods who have label `app=app-demo` can allocate the reserved resources + matchLabels: + app: app-demo + ttl: 1h +``` + +```bash +$ kubectl create -f reservation-demo-big.yaml +reservation.scheduling.koordinator.sh/reservation-demo-big created +``` + +3. Watch the reservation status util it becomes available. + +```bash +$ kubectl get reservation reservation-demo-big -o wide +NAME PHASE AGE NODE TTL EXPIRES +reservation-demo-big Available 37s node-1 1h +``` + +The reservation `reservation-demo-big` is scheduled at the node `node-1`, which matches the nodeName set in pod template. + +4. Deploy a deployment `app-demo` with the YAML file below. + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: app-demo +spec: + replicas: 2 + selector: + matchLabels: + app: app-demo + template: + metadata: + name: stress + labels: + app: app-demo # match the owner spec of `reservation-demo-big` + spec: + schedulerName: koord-scheduler # use koord-scheduler + containers: + - name: stress + image: polinux/stress + args: + - '-c' + - '1' + command: + - stress + resources: + requests: + cpu: 2 + memory: 10Gi + limits: + cpu: 4 + memory: 20Gi +``` + +```bash +$ kubectl create -f app-demo.yaml +deployment.apps/app-demo created +``` + +5. Check the scheduled result of the pods of deployment `app-demo`. + +```bash +k get pod -l app=app-demo -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +app-demo-798c66db46-ctnbr 1/1 Running 0 2m 10.17.0.124 node-1 +app-demo-798c66db46-pzphc 1/1 Running 0 2m 10.17.0.125 node-1 +``` + +Pods of deployment `app-demo` are scheduled at the same node with `reservation-demo-big`. + +6. Check the status of the reservation `reservation-demo-big`. + +```bash +$ kubectl get reservation reservation-demo-big -oyaml +apiVersion: scheduling.koordinator.sh/v1alpha1 +kind: Reservation +metadata: + name: reservation-demo-big + creationTimestamp: "YYYY-MM-DDT06:28:16Z" + uid: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + ... +spec: + owners: + - object: + name: pod-demo-0 + namespace: default + template: + spec: + containers: + - args: + - -c + - "1" + command: + - stress + image: polinux/stress + imagePullPolicy: Always + name: stress + resources: + requests: + cpu: 500m + memory: 800Mi + schedulerName: koord-scheduler + ttl: 1h +status: + allocatable: + cpu: 6 + memory: 20Gi + allocated: + cpu: 4 + memory: 20Gi + conditions: + - lastProbeTime: "YYYY-MM-DDT06:28:17Z" + lastTransitionTime: "YYYY-MM-DDT06:28:17Z" + reason: Scheduled + status: "True" + type: Scheduled + - lastProbeTime: "YYYY-MM-DDT06:28:17Z" + lastTransitionTime: "YYYY-MM-DDT06:28:17Z" + reason: Available + status: "True" + type: Ready + currentOwners: + - name: app-demo-798c66db46-ctnbr + namespace: default + uid: yyyyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy + - name: app-demo-798c66db46-pzphc + namespace: default + uid: zzzzzzzz-zzzz-zzzz-zzzzzzzzzzzz + nodeName: node-1 + phase: Available +``` + +Now we can see the reservation `reservation-demo-big` has reserved 6 cpu and 20Gi memory, and the pods of deployment +`app-demo` allocates 4 cpu and 20Gi memory from the reserved resources. +The allocation for reserved resources does not increase the requested of node resources, otherwise the total request of +`node-1` would exceed the node allocatable. +Moreover, a reservation can be allocated by multiple owners when there are enough reserved resources unallocated. diff --git a/versioned_docs/version-v1.4/user-manuals/slo-config.md b/versioned_docs/version-v1.4/user-manuals/slo-config.md new file mode 100644 index 000000000..2141699b4 --- /dev/null +++ b/versioned_docs/version-v1.4/user-manuals/slo-config.md @@ -0,0 +1,432 @@ +# SLO Configuration + +## Introduction + +Koordinator uses a ConfigMap to manage the SLO configurations. The ConfigMap is used by the slo-controller, whose name +and namespace can be specified via the startup arguments of the koord-manager +(`koordinator-system/slo-controller-config` by default). It has the following keys respectively: + +- `colocation-config`: The configuration for colocation. For example, whether to enable the colocated batch resources or not, the colocated watermark. +- `resource-threshold-config`: The configuration for threshold-based suppression or eviction. For example, the threshold for cpu suppression, the threshold for memory eviction. +- `resource-qos-config`: The configuration for QoS-based features. For example, Group Identity for BE pods, Memory QoS for LS pods, Last-Level-Cache partitioning for BE pods. +- `cpu-burst-config`: The configuration for the CPU Burst feature. For example, maximum burst ratio of the pod. +- `system-config`: The configuration for system-level settings. For example, the global minimum memory free factor (`min_free_kbytes`). + +### Configuration Levels + +Each config is defined in a pattern of both the cluster-level and the node-level. + +e.g. + +```go +type ColocationCfg struct { + ColocationStrategy `json:",inline"` + NodeConfigs []NodeColocationCfg `json:"nodeConfigs,omitempty"` +} + +type ResourceQOSCfg struct { + ClusterStrategy *slov1alpha1.ResourceQOSStrategy `json:"clusterStrategy,omitempty"` + NodeStrategies []NodeResourceQOSStrategy `json:"nodeStrategies,omitempty"` +} +``` + +The cluster-level config is for setting the global configurations, while the node-level is for users to adjust the +configurations of some nodes, especially for a gray-scale deployment. + +Please note that most configured fields have default values inside the components (koordlet, koord-manager), so editing +the changed parameters is usually enough. + +### NodeSLO + +The data in SLO config is parsed by the koord-manager. The koord-manager checks if the config data is legal, and then +updates the parsed configs into NodeSLO objects for every node. If the parsing fails, the koord-manager records events +to the ConfigMap object to warn the unmarshal errors. For the agent component koordlet, it watches the specifications +in the NodeSLO and reconciles the node QoS features. + +```yaml +apiVersion: slo.koordinator.sh/v1alpha1 +kind: NodeSLO +metadata: + name: test-node +spec: + cpuBurstStrategy: {} + extensions: {} + resourceQOSStrategy: {} + systemStrategy: {} + # parsed from the `resource-threshold-config` data + resourceUsedThresholdWithBE: + cpuSuppressPolicy: cpuset + cpuSuppressThresholdPercent: 65 + enable: true + memoryEvictThresholdPercent: 70 + +``` + +## Configurations + +> Referred version: Koordinator v1.2 + +The SLO Config template is as follows: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: slo-controller-config + namespace: koordinator-system +data: + # colocation-config is the configuration for colocation. + # Related features: Dynamic resource over-commitment, Load-aware scheduling, Load-aware descheduling. + # - enable: whether to enable the colocation. If false, the reclaimed resources of the node allocatable (e.g. `kubernetes.io/batch-cpu`) will be removed. + # - metricAggregateDurationSeconds: the aggregated duration of node metrics reporting. + # - metricReportIntervalSeconds: the reporting interval of the node metrics. + # - metricAggregatePolicy: policies of reporting node metrics in different durations. + # - cpuReclaimThresholdPercent: the reclaim threshold for calculating the reclaimed cpu resource. Basically, the reclaimed resource cannot reclaim the unused resources which are exceeding the threshold. + # - memoryReclaimThresholdPercent: the reclaim threshold for calculating the reclaimed memory resource. Basically, the reclaimed resource cannot reclaim the unused resources which are exceeding the threshold. + # - memoryCalculatePolicy: the policy for calculating the reclaimable memory resource. If set to `request`, only unallocated memory resource of high-priority pods are reclaimable, and no allocated memory can be reclaimed. + # - degradeTimeMinutes: the threshold duration to degrade the colocation for which the node metrics has not been updated. + # - updateTimeThresholdSeconds: the threshold duration to force updating the reclaimed resources with the latest calculated result. + # - resourceDiffThreshold: the threshold to update the reclaimed resources than which the calculated reclaimed resources is different from the current. + # - nodeConfigs: the node-level configurations which matches the nodes via the node selector and overrides the cluster configuration. + colocation-config: | + { + "enable": false, + "metricAggregateDurationSeconds": 300, + "metricReportIntervalSeconds": 60, + "metricAggregatePolicy": { + "durations": [ + "5m", + "10m", + "15m" + ] + }, + "cpuReclaimThresholdPercent": 60, + "memoryReclaimThresholdPercent": 65, + "memoryCalculatePolicy": "usage", + "degradeTimeMinutes": 15, + "updateTimeThresholdSeconds": 300, + "resourceDiffThreshold": 0.1, + "nodeConfigs": [ + { + "name": "anolis", + "nodeSelector": { + "matchLabels": { + "kubernetes.io/kernel": "anolis" + } + }, + "updateTimeThresholdSeconds": 360, + "resourceDiffThreshold": 0.2 + } + ] + } + # The configuration for threshold-based strategies. + # Related features: BECPUSuppress, BEMemoryEvict, BECPUEvict. + # - clusterStrategy: the cluster-level configuration. + # - nodeStrategies: the node-level configurations which matches the nodes via the node selector and overrides the cluster configuration. + # - enable: whether to enable the threshold-based strategies or not. If false, all threshold-based strategies are disabled. If set to true, CPU Suppress and Memory Evict are enabled by default. + # - cpuSuppressThresholdPercent: the node cpu utilization threshold to suppress BE pods' usage. + # - cpuSuppressPolicy: the policy of cpu suppression. If set to `cpuset`, the BE pods' `cpuset.cpus` will be reconciled when suppression. If set to `cfsQuota`, the BE pods' `cpu.cfs_quota_us` will be reconciled. + # - memoryEvictThresholdPercent: the node memory utilization threshold to evict BE pods. + # - memoryEvictLowerPercent: the node memory utilization threshold to stop the memory eviction. By default, `lowerPercent = thresholdPercent - 2`. + # - cpuEvictBESatisfactionLowerPercent: the cpu satisfaction threshold to start the cpu eviction (also require to meet the BE util threshold). + # - cpuEvictBEUsageThresholdPercent: the BE utilization (BEUsage / BERealLimit) threshold to start the cpu eviction (also require to meet the cpu satisfaction threshold). + # - cpuEvictBESatisfactionUpperPercent: the cpu satisfaction threshold to stop the cpu eviction. + # - cpuEvictTimeWindowSeconds: the time window of the cpu metrics for the cpu eviction. + resource-threshold-config: | + { + "clusterStrategy": { + "enable": false, + "cpuSuppressThresholdPercent": 65, + "cpuSuppressPolicy": "cpuset", + "memoryEvictThresholdPercent": 70, + "memoryEvictLowerPercent": 65, + "cpuEvictBESatisfactionUpperPercent": 90, + "cpuEvictBESatisfactionLowerPercent": 60, + "cpuEvictBEUsageThresholdPercent": 90 + }, + "nodeStrategies": [ + { + "name": "anolis", + "nodeSelector": { + "matchLabels": { + "kubernetes.io/kernel": "anolis" + } + }, + "cpuEvictBEUsageThresholdPercent": 80 + } + ] + } + # The configuration for QoS-based features. + # Related features: CPUQoS (GroupIdentity), MemoryQoS (CgroupReconcile), ResctrlQoS. + # - clusterStrategy: the cluster-level configuration. + # - nodeStrategies: the node-level configurations which matches the nodes via the node selector and overrides the cluster configuration. + # - lsrClass/lsClass/beClass: the configuration for pods of QoS LSR/LS/BE respectively. + # - cpuQOS: the configuration of CPU QoS. + # - enable: whether to enable CPU QoS. If set to `false`, the related cgroup configs will be reset to the system default. + # - groupIdentity: the priority level of the Group Identity ([-1, 2]). `2` means the highest priority, while `-1` means the lowest priority. Anolis OS required. + # - memoryQOS: the configuration of Memory QoS. + # - enable: whether to enable Memory QoS. If set to `false`, the related cgroup configs will be reset to the system default. + # - minLimitPercent: the scale percentage for setting the `memory.min` based on the container's request. It enables the memory protection from the Linux memory reclaim. + # - lowLimitPercent: the scale percentage for setting the `memory.low` based on the container's request. It enables the memory soft protection from the Linux memory reclaim. + # - throttlingPercent: the scale percentage for setting the `memory.high` based on the container's limit. It enables the memory throttling in cgroup level. + # - wmarkRatio: the ratio of container-level asynchronous memory reclaim based on the container's limit. Anolis OS required. + # - wmarkScalePermill: the per-mill of container memory to reclaim in once asynchronous memory reclaim. Anolis OS required. + # - wmarkMinAdj: the adjustment percentage of global memory min watermark. It affects the reclaim priority when the node memory free is quite a few. Anolis OS required. + # - resctrlQOS: the configuration of Resctrl (Intel RDT) QoS. + # - enable: whether to enable Resctrl QoS. + # - catRangeStartPercent: the starting percentage of the L3 Cache way partitioning. L3 CAT required. + # - catRangeEndPercent: the ending percentage of the L3 Cache way partitioning. L3 CAT required. + # - mbaPercent: the allocation percentage of the memory bandwidth. MBA required. + resource-qos-config: | + { + "clusterStrategy": { + "lsrClass": { + "cpuQOS": { + "enable": false, + "groupIdentity": 2 + }, + "memoryQOS": { + "enable": false, + "minLimitPercent": 0, + "lowLimitPercent": 0, + "throttlingPercent": 0, + "wmarkRatio": 95, + "wmarkScalePermill": 20, + "wmarkMinAdj": -25, + "priorityEnable": 0, + "priority": 0, + "oomKillGroup": 0 + }, + "resctrlQOS": { + "enable": false, + "catRangeStartPercent": 0, + "catRangeEndPercent": 100, + "mbaPercent": 100 + } + }, + "lsClass": { + "cpuQOS": { + "enable": false, + "groupIdentity": 2 + }, + "memoryQOS": { + "enable": false, + "minLimitPercent": 0, + "lowLimitPercent": 0, + "throttlingPercent": 0, + "wmarkRatio": 95, + "wmarkScalePermill": 20, + "wmarkMinAdj": -25, + "priorityEnable": 0, + "priority": 0, + "oomKillGroup": 0 + }, + "resctrlQOS": { + "enable": false, + "catRangeStartPercent": 0, + "catRangeEndPercent": 100, + "mbaPercent": 100 + } + }, + "beClass": { + "cpuQOS": { + "enable": false, + "groupIdentity": -1 + }, + "memoryQOS": { + "enable": false, + "minLimitPercent": 0, + "lowLimitPercent": 0, + "throttlingPercent": 0, + "wmarkRatio": 95, + "wmarkScalePermill": 20, + "wmarkMinAdj": 50, + "priorityEnable": 0, + "priority": 0, + "oomKillGroup": 0 + }, + "resctrlQOS": { + "enable": false, + "catRangeStartPercent": 0, + "catRangeEndPercent": 30, + "mbaPercent": 100 + } + } + }, + "nodeStrategies": [ + { + "name": "anolis", + "nodeSelector": { + "matchLabels": { + "kubernetes.io/kernel": "anolis" + } + }, + "beClass": { + "memoryQOS": { + "wmarkRatio": 90 + } + } + } + ] + } + # The configuration for the CPU Burst. + # Related features: CPUBurst. + # - clusterStrategy: the cluster-level configuration. + # - nodeStrategies: the node-level configurations which matches the nodes via the node selector and overrides the cluster configuration. + # - policy: the policy of CPU Burst. If set to `none`, the CPU Burst is disabled. If set to `auto`, the CPU Burst is fully enabled. If set to `cpuBurstOnly`, only the Linux CFS Burst feature is enabled. + # - cpuBurstPercent: the percentage of Linux CFS Burst. It affects the value of `cpu.cfs_burst_us` of pod/container cgroups. It specifies the percentage to which the CPU limit can be increased by CPU Burst. + # - cfsQuotaBurstPercent: the percentage of cfs quota burst. It affects the scaled ratio of `cpu.cfs_quota_us` of pod/container cgroups. It specifies the maximum percentage to which the value of cfs_quota in the cgroup parameters can be increased. + # - cfsQuotaBurstPeriodSeconds: the maximum period of once cfs quota burst. It indicates that the time period in which the container can run with an increased CFS quota is unlimited. + # - sharePoolThresholdPercent: the threshold of share pool utilization. If the share pool utilization is too high, CPU Burst will be stopped and reset to avoid machine overload. + cpu-burst-config: | + { + "clusterStrategy": { + "policy": "none", + "cpuBurstPercent": 1000, + "cfsQuotaBurstPercent": 300, + "cfsQuotaBurstPeriodSeconds": -1, + "sharePoolThresholdPercent": 50 + }, + "nodeStrategies": [ + { + "name": "anolis", + "nodeSelector": { + "matchLabels": { + "kubernetes.io/kernel": "anolis" + } + }, + "policy": "cfsQuotaBurstOnly", + "cfsQuotaBurstPercent": 400 + } + ] + } + # The configuration for system-level settings. + # Related features: SystemConfig. + # - clusterStrategy: the cluster-level configuration. + # - nodeStrategies: the node-level configurations which matches the nodes via the node selector and overrides the cluster configuration. + # - minFreeKbytesFactor: the factor for calculating the global minimum memory free watermark `/proc/sys/vm/min_free_kbytes`. `min_free_kbytes = minFreeKbytesFactor * nodeTotalMemory / 10000`. + # - watermarkScaleFactor: the reclaim factor `/proc/sys/vm/watermark_scale_factor` in once global memory reclaim. + # - memcgReapBackGround: whether to enable the reaper for orphan memory cgroups. + system-config: |- + { + "clusterStrategy": { + "minFreeKbytesFactor": 100, + "watermarkScaleFactor": 150, + "memcgReapBackGround": 0 + } + "nodeStrategies": [ + { + "name": "anolis", + "nodeSelector": { + "matchLabels": { + "kubernetes.io/kernel": "anolis" + } + }, + "minFreeKbytesFactor": 100, + "watermarkScaleFactor": 150 + } + ] + } + # The configuration for host application settings. + # - name: name of the host application. + # - qos: QoS class of the application. + # - cgroupPath: cgroup path of the application, the directory equals to `${base}/${parentDir}/${relativePath}`. + # - cgroupPath.base: cgroup base dir of the application, the format is various across cgroup drivers. + # - cgroupPath.parentDir: cgroup parent path under base dir. By default it is "host-latency-sensitive/" for LS and "host-latency-sensitive/" for BE. + # - cgroupPath.relativePath: cgroup relative path under parent dir. + host-application-config: | + { + "applications": [ + { + "name": "nginx", + "qos": "LS", + "cgroupPath": { + "base": "CgroupRoot", + "parentDir": "host-latency-sensitive/", + "relativePath": "nginx/" + } + } + ] + } +``` + +For more information, please check the user manuals and designs of the related features. + +## Quick Start + +1. Check the current SLO configurations via the ConfigMap `koordinator-system/slo-controller-config`. + +```bash +$ kubectl get configmap -n koordinator-system slo-controller-config -o yaml +apiVersion: v1 +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: koordinator + meta.helm.sh/release-namespace: default + labels: + app.kubernetes.io/managed-by: Helm + name: slo-controller-config + namespace: koordinator-system +data: + colocation-config: | + { + "enable": false, + "metricAggregateDurationSeconds": 300, + "metricReportIntervalSeconds": 60, + "cpuReclaimThresholdPercent": 60, + "memoryReclaimThresholdPercent": 65, + "memoryCalculatePolicy": "usage", + "degradeTimeMinutes": 15, + "updateTimeThresholdSeconds": 300, + "resourceDiffThreshold": 0.1 + } + resource-threshold-config: | + { + "clusterStrategy": { + "enable": false + } + } +``` + +2. Edit the ConfigMap `koordinator-system/slo-controller-config` to change the SLO config. + +```bash +$ kubectl edit configmap -n koordinator-system slo-controller-config +``` + +For example, the configmap is edited as follows: + +```yaml +data: + # ... + resource-threshold-config: | + { + "clusterStrategy": { + "enable": true, + "cpuSuppressThresholdPercent": 60, + "cpuSuppressPolicy": "cpuset", + "memoryEvictThresholdPercent": 60 + } + } +``` + +3. Verify if the NodeSLO is successfully dispatched. + +> NOTE: The default values will be omitted in the NodeSLO. + +```bash +$ kubectl get nodeslo.slo.koordinator.sh test-node -o yaml +apiVersion: slo.koordinator.sh/v1alpha1 +kind: NodeSLO +metadata: + name: test-node +spec: + # ... + extensions: {} + resourceUsedThresholdWithBE: + cpuSuppressPolicy: cpuset + cpuSuppressThresholdPercent: 60 + enable: true + memoryEvictThresholdPercent: 60 +``` diff --git a/versioned_sidebars/version-v1.4-sidebars.json b/versioned_sidebars/version-v1.4-sidebars.json new file mode 100644 index 000000000..271303a6f --- /dev/null +++ b/versioned_sidebars/version-v1.4-sidebars.json @@ -0,0 +1,93 @@ +{ + "docs": [ + { + "type": "category", + "label": "Getting Started", + "collapsed": false, + "items": [ + "introduction", + "installation" + ] + }, + { + "type": "category", + "label": "Architecture", + "collapsed": false, + "items": [ + "architecture/overview", + "architecture/resource-model", + "architecture/priority", + "architecture/qos" + ] + }, + { + "type": "category", + "label": "User Manuals", + "collapsed": true, + "items": [ + { + "Task Scheduling": [ + "user-manuals/gang-scheduling", + "user-manuals/capacity-scheduling" + ], + "Heterogeneous Resources Scheduling": [ + "user-manuals/fine-grained-device-scheduling" + ], + "Load-aware Scheduling": [ + "user-manuals/load-aware-scheduling", + "user-manuals/load-aware-descheduling" + ], + "Fine-grained Scheduling": [ + "user-manuals/fine-grained-cpu-orchestration", + "user-manuals/cpu-burst", + "user-manuals/cpu-qos", + "user-manuals/memory-qos" + ], + "Colocation": [ + "user-manuals/colocation-profile", + "user-manuals/cpu-suppress", + "user-manuals/cpu-evict", + "user-manuals/memory-evict", + "user-manuals/slo-config", + "user-manuals/host-application-qos", + "user-manuals/performance-collector" + ], + "Utils" : [ + "user-manuals/resource-reservation", + "user-manuals/pod-migration-job" + ] + } + ] + }, + { + "type": "category", + "label": "Design Details", + "collapsed": true, + "items": [ + "designs/koordlet-overview", + "designs/runtime-proxy", + "designs/nri-mode-resource-management", + "designs/node-prediction", + "designs/enhanced-scheduler-extension", + "designs/load-aware-scheduling", + "designs/fine-grained-cpu-orchestration", + "designs/resource-reservation", + "designs/pod-migration-job", + "designs/descheduler-framework", + "designs/fine-grained-device-scheduling", + "designs/gang-scheduling", + "designs/multi-hierarchy-elastic-quota-management" + ] + }, + { + "type": "category", + "label": "Best Practices", + "collapsed": true, + "items": [ + "best-practices/colocation-of-spark-jobs", + "best-practices/anolis_plugsched", + "best-practices/fine-grained-cpu-orchestration" + ] + } + ] +} diff --git a/versions.json b/versions.json index 15ed85c51..67d5ae9b2 100644 --- a/versions.json +++ b/versions.json @@ -1,4 +1,5 @@ [ + "v1.4", "v1.3", "v1.2", "v1.1",