Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improving Postgres CR Status with Additional Details #2714

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions manifests/minimal-postgres-manifest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ apiVersion: "acid.zalan.do/v1"
kind: postgresql
metadata:
name: acid-minimal-cluster
labels:
cluster-name: acid-minimal-cluster
spec:
teamId: "acid"
volume:
Expand Down
35 changes: 32 additions & 3 deletions manifests/postgresql.crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ spec:
storage: true
subresources:
status: {}
scale:
specReplicasPath: .spec.numberOfInstances
statusReplicasPath: .status.numberOfInstances
labelSelectorPath: .status.labelSelector
additionalPrinterColumns:
- name: Team
type: string
Expand Down Expand Up @@ -51,7 +55,7 @@ spec:
- name: Status
type: string
description: Current sync status of postgresql resource
jsonPath: .status.PostgresClusterStatus
jsonPath: .status.postgresClusterStatus
schema:
openAPIV3Schema:
type: object
Expand Down Expand Up @@ -677,5 +681,30 @@ spec:
type: integer
status:
type: object
additionalProperties:
type: string
properties:
postgresClusterStatus:
type: string
numberOfInstances:
format: int32
type: integer
labelSelector:
type: string
observedGeneration:
format: int64
type: integer
conditions:
type: array
items:
type: object
properties:
type:
type: string
status:
type: string
lastTransitionTime:
type: string
format: date-time
reason:
type: string
message:
type: string
66 changes: 60 additions & 6 deletions pkg/apis/acid.zalan.do/v1/crds.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ const (
OperatorConfigCRDResourceShort = "opconfig"
)

var (
Copy link
Member

@FxKu FxKu Aug 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these not const?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed to const

specReplicasPath = ".spec.numberOfInstances"
statusReplicasPath = ".status.numberOfInstances"
labelSelectorPath = ".status.labelSelector"
)

// PostgresCRDResourceColumns definition of AdditionalPrinterColumns for postgresql CRD
var PostgresCRDResourceColumns = []apiextv1.CustomResourceColumnDefinition{
{
Expand Down Expand Up @@ -72,7 +78,7 @@ var PostgresCRDResourceColumns = []apiextv1.CustomResourceColumnDefinition{
Name: "Status",
Type: "string",
Description: "Current sync status of postgresql resource",
JSONPath: ".status.PostgresClusterStatus",
JSONPath: ".status.postgresClusterStatus",
},
}

Expand Down Expand Up @@ -1106,10 +1112,47 @@ var PostgresCRDResourceValidation = apiextv1.CustomResourceValidation{
},
"status": {
Type: "object",
AdditionalProperties: &apiextv1.JSONSchemaPropsOrBool{
Schema: &apiextv1.JSONSchemaProps{
Properties: map[string]apiextv1.JSONSchemaProps{
"postgresClusterStatus": {
Type: "string",
},
"numberOfInstances": {
Type: "integer",
Format: "int32",
},
"labelSelector": {
Type: "string",
},
"observedGeneration": {
Type: "integer",
Format: "int64",
},
"conditions": {
Type: "array",
Items: &apiextv1.JSONSchemaPropsOrArray{
Schema: &apiextv1.JSONSchemaProps{
Type: "object",
Properties: map[string]apiextv1.JSONSchemaProps{
"type": {
Type: "string",
},
"status": {
Type: "string",
},
"lastTransitionTime": {
Type: "string",
Format: "date-time",
},
"reason": {
Type: "string",
},
"message": {
Type: "string",
},
},
},
},
},
},
},
},
Expand Down Expand Up @@ -1983,7 +2026,7 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{
func buildCRD(name, kind, plural, list, short string,
categories []string,
columns []apiextv1.CustomResourceColumnDefinition,
validation apiextv1.CustomResourceValidation) *apiextv1.CustomResourceDefinition {
validation apiextv1.CustomResourceValidation, specReplicasPath string, statusReplicasPath string, labelSelectorPath string) *apiextv1.CustomResourceDefinition {
return &apiextv1.CustomResourceDefinition{
TypeMeta: metav1.TypeMeta{
APIVersion: fmt.Sprintf("%s/%s", apiextv1.GroupName, apiextv1.SchemeGroupVersion.Version),
Expand All @@ -2010,6 +2053,11 @@ func buildCRD(name, kind, plural, list, short string,
Storage: true,
Subresources: &apiextv1.CustomResourceSubresources{
Status: &apiextv1.CustomResourceSubresourceStatus{},
Scale: &apiextv1.CustomResourceSubresourceScale{
SpecReplicasPath: specReplicasPath,
StatusReplicasPath: statusReplicasPath,
LabelSelectorPath: &labelSelectorPath,
},
},
AdditionalPrinterColumns: columns,
Schema: &validation,
Expand All @@ -2028,7 +2076,10 @@ func PostgresCRD(crdCategories []string) *apiextv1.CustomResourceDefinition {
PostgresCRDResourceShort,
crdCategories,
PostgresCRDResourceColumns,
PostgresCRDResourceValidation)
PostgresCRDResourceValidation,
specReplicasPath,
statusReplicasPath,
labelSelectorPath)
}

// ConfigurationCRD returns CustomResourceDefinition built from OperatorConfigCRDResource
Expand All @@ -2040,5 +2091,8 @@ func ConfigurationCRD(crdCategories []string) *apiextv1.CustomResourceDefinition
OperatorConfigCRDResourceShort,
crdCategories,
OperatorConfigCRDResourceColumns,
OperatorConfigCRDResourceValidation)
OperatorConfigCRDResourceValidation,
specReplicasPath,
statusReplicasPath,
labelSelectorPath)
}
42 changes: 41 additions & 1 deletion pkg/apis/acid.zalan.do/v1/postgresql_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package v1
// Postgres CRD definition, please use CamelCase for field names.

import (
"k8s.io/apimachinery/pkg/api/equality"
"time"

v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -225,9 +226,48 @@ type Sidecar struct {
// UserFlags defines flags (such as superuser, nologin) that could be assigned to individual users
type UserFlags []string

type Conditions []Condition

type ConditionType string
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
type ConditionType string
type PostgresqlConditionType string

type VolatileTime struct {
Inner metav1.Time `json:",inline"`
}

// MarshalJSON implements the json.Marshaler interface.
func (t VolatileTime) MarshalJSON() ([]byte, error) {
return t.Inner.MarshalJSON()
}

// UnmarshalJSON implements the json.Unmarshaller interface.
func (t *VolatileTime) UnmarshalJSON(b []byte) error {
return t.Inner.UnmarshalJSON(b)
}

func init() {
equality.Semantic.AddFunc(
// Always treat VolatileTime fields as equivalent.
func(VolatileTime, VolatileTime) bool {
return true
},
)
}

// Condition contains the conditions of the PostgreSQL cluster
type Condition struct {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be changed to PostgresqlCondition? This aligns with other K8s resources.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, changed to PostgresqlCondition

Type ConditionType `json:"type" description:"type of status condition"`
Status v1.ConditionStatus `json:"status" description:"status of the condition, one of True, False, Unknown"`
LastTransitionTime VolatileTime `json:"lastTransitionTime,omitempty" description:"last time the condition transit from one status to another"`
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why use an extra type and not just metav1.Time?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mostly to avoid semantic check inequality with metav1.time, with a wrapper around metav1.time there won't be a semantic inequality even if the times are different but everything else in the CR is same, this could be handy while writing unit tests or e2e tests I think

Reason string `json:"reason,omitempty" description:"one-word CamelCase reason for the condition's last transition"`
Message string `json:"message,omitempty" description:"human-readable message indicating details about last transition"`
}

// PostgresStatus contains status of the PostgreSQL cluster (running, creation failed etc.)
type PostgresStatus struct {
PostgresClusterStatus string `json:"PostgresClusterStatus"`
PostgresClusterStatus string `json:"postgresClusterStatus"`
NumberOfInstances int32 `json:"numberOfInstances"`
LabelSelector string `json:"labelSelector"`
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
Conditions Conditions `json:"conditions,omitempty"`
}

// ConnectionPooler Options for connection pooler
Expand Down
4 changes: 0 additions & 4 deletions pkg/apis/acid.zalan.do/v1/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,3 @@ func (postgresStatus PostgresStatus) Running() bool {
func (postgresStatus PostgresStatus) Creating() bool {
return postgresStatus.PostgresClusterStatus == ClusterStatusCreating
}

func (postgresStatus PostgresStatus) String() string {
return postgresStatus.PostgresClusterStatus
}
19 changes: 13 additions & 6 deletions pkg/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,16 +255,20 @@ func (c *Cluster) Create() (err error) {
ss *appsv1.StatefulSet
)

//Even though its possible to propogate other CR labels to the pods, picking the default label here since its propogated to all the pods by default. But this means that in order for the scale subresource to work properly, user must set the "cluster-name" key in their CRs with value matching the CR name.
labelstring := fmt.Sprintf("%s=%s", "cluster-name", c.Postgresql.ObjectMeta.Labels["cluster-name"]) //TODO: make this configurable.
Copy link
Member

@FxKu FxKu Aug 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Labels we propagate to all child resources are configurable with ClusterLabels and ClusterNameLabel. We should use the latter, I think and not hard code it here.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have changed it, using ClusterNameLabel


defer func() {
var (
pgUpdatedStatus *acidv1.Postgresql
errStatus error
)
existingConditions := c.Postgresql.Status.Conditions
if err == nil {
pgUpdatedStatus, errStatus = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusRunning) //TODO: are you sure it's running?
pgUpdatedStatus, errStatus = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusRunning, c.Postgresql.Spec.NumberOfInstances, labelstring, c.Postgresql.Generation, existingConditions, "") //TODO: are you sure it's running?
} else {
c.logger.Warningf("cluster created failed: %v", err)
pgUpdatedStatus, errStatus = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusAddFailed)
pgUpdatedStatus, errStatus = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusAddFailed, 0, labelstring, 0, existingConditions, err.Error())
}
if errStatus != nil {
c.logger.Warningf("could not set cluster status: %v", errStatus)
Expand All @@ -274,7 +278,8 @@ func (c *Cluster) Create() (err error) {
}
}()

pgCreateStatus, err = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusCreating)
existingConditions := c.Postgresql.Status.Conditions
pgCreateStatus, err = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusCreating, 0, labelstring, 0, existingConditions, "")
if err != nil {
return fmt.Errorf("could not set cluster status: %v", err)
}
Expand Down Expand Up @@ -927,7 +932,9 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
c.mu.Lock()
defer c.mu.Unlock()

c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusUpdating)
labelstring := fmt.Sprintf("%s=%s", "cluster-name", c.Postgresql.ObjectMeta.Labels["cluster-name"])
existingConditions := c.Postgresql.Status.Conditions
c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusUpdating, c.Postgresql.Status.NumberOfInstances, labelstring, c.Postgresql.Status.ObservedGeneration, existingConditions, "")
c.setSpec(newSpec)

defer func() {
Expand All @@ -936,9 +943,9 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
err error
)
if updateFailed {
pgUpdatedStatus, err = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusUpdateFailed)
pgUpdatedStatus, err = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusUpdateFailed, c.Postgresql.Status.NumberOfInstances, labelstring, c.Postgresql.Status.ObservedGeneration, existingConditions, err.Error())
} else {
pgUpdatedStatus, err = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusRunning)
pgUpdatedStatus, err = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusRunning, newSpec.Spec.NumberOfInstances, labelstring, c.Postgresql.Generation, existingConditions, "")
}
if err != nil {
c.logger.Warningf("could not set cluster status: %v", err)
Expand Down
6 changes: 4 additions & 2 deletions pkg/cluster/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,13 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error {
pgUpdatedStatus *acidv1.Postgresql
errStatus error
)
labelstring := fmt.Sprintf("%s=%s", "cluster-name", c.Postgresql.ObjectMeta.Labels["cluster-name"])
existingConditions := c.Postgresql.Status.Conditions
if err != nil {
c.logger.Warningf("error while syncing cluster state: %v", err)
pgUpdatedStatus, errStatus = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusSyncFailed)
pgUpdatedStatus, errStatus = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusSyncFailed, newSpec.Status.NumberOfInstances, labelstring, c.Postgresql.Status.ObservedGeneration, existingConditions, errStatus.Error())
} else if !c.Status.Running() {
pgUpdatedStatus, errStatus = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusRunning)
pgUpdatedStatus, errStatus = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusRunning, newSpec.Spec.NumberOfInstances, labelstring, c.Postgresql.Generation, existingConditions, "")
}
if errStatus != nil {
c.logger.Warningf("could not set cluster status: %v", errStatus)
Expand Down
12 changes: 8 additions & 4 deletions pkg/controller/postgresql.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,9 @@
func (c *Controller) addCluster(lg *logrus.Entry, clusterName spec.NamespacedName, pgSpec *acidv1.Postgresql) (*cluster.Cluster, error) {
if c.opConfig.EnableTeamIdClusternamePrefix {
if _, err := acidv1.ExtractClusterName(clusterName.Name, pgSpec.Spec.TeamID); err != nil {
c.KubeClient.SetPostgresCRDStatus(clusterName, acidv1.ClusterStatusInvalid)
labelstring := fmt.Sprintf("%s=%s", "cluster-name", pgSpec.ObjectMeta.Labels["cluster-name"])
existingConditions := pgSpec.Status.Conditions
c.KubeClient.SetPostgresCRDStatus(clusterName, acidv1.ClusterStatusInvalid, pgSpec.Status.NumberOfInstances, labelstring, pgSpec.Status.ObservedGeneration, existingConditions, err.Error())
return nil, err
}
}
Expand Down Expand Up @@ -207,10 +209,10 @@
if event.EventType == EventRepair {
runRepair, lastOperationStatus := cl.NeedsRepair()
if !runRepair {
lg.Debugf("observed cluster status %s, repair is not required", lastOperationStatus)

Check failure on line 212 in pkg/controller/postgresql.go

View workflow job for this annotation

GitHub Actions / End-2-End tests

(*github.com/sirupsen/logrus.Entry).Debugf format %s has arg lastOperationStatus of wrong type github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1.PostgresStatus

Check failure on line 212 in pkg/controller/postgresql.go

View workflow job for this annotation

GitHub Actions / Unit tests and coverage

(*github.com/sirupsen/logrus.Entry).Debugf format %s has arg lastOperationStatus of wrong type github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1.PostgresStatus
return
}
lg.Debugf("observed cluster status %s, running sync scan to repair the cluster", lastOperationStatus)

Check failure on line 215 in pkg/controller/postgresql.go

View workflow job for this annotation

GitHub Actions / End-2-End tests

(*github.com/sirupsen/logrus.Entry).Debugf format %s has arg lastOperationStatus of wrong type github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1.PostgresStatus

Check failure on line 215 in pkg/controller/postgresql.go

View workflow job for this annotation

GitHub Actions / Unit tests and coverage

(*github.com/sirupsen/logrus.Entry).Debugf format %s has arg lastOperationStatus of wrong type github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1.PostgresStatus
event.EventType = EventSync
}

Expand Down Expand Up @@ -472,16 +474,18 @@

if clusterError != "" && eventType != EventDelete {
c.logger.WithField("cluster-name", clusterName).Debugf("skipping %q event for the invalid cluster: %s", eventType, clusterError)
labelstring := fmt.Sprintf("%s=%s", "cluster-name", informerNewSpec.ObjectMeta.Labels["cluster-name"])
existingConditions := informerNewSpec.Status.Conditions

switch eventType {
case EventAdd:
c.KubeClient.SetPostgresCRDStatus(clusterName, acidv1.ClusterStatusAddFailed)
c.KubeClient.SetPostgresCRDStatus(clusterName, acidv1.ClusterStatusAddFailed, informerNewSpec.Status.NumberOfInstances, labelstring, informerNewSpec.Status.ObservedGeneration, existingConditions, clusterError)
c.eventRecorder.Eventf(c.GetReference(informerNewSpec), v1.EventTypeWarning, "Create", "%v", clusterError)
case EventUpdate:
c.KubeClient.SetPostgresCRDStatus(clusterName, acidv1.ClusterStatusUpdateFailed)
c.KubeClient.SetPostgresCRDStatus(clusterName, acidv1.ClusterStatusUpdateFailed, informerNewSpec.Status.NumberOfInstances, labelstring, informerNewSpec.Status.ObservedGeneration, existingConditions, clusterError)
c.eventRecorder.Eventf(c.GetReference(informerNewSpec), v1.EventTypeWarning, "Update", "%v", clusterError)
default:
c.KubeClient.SetPostgresCRDStatus(clusterName, acidv1.ClusterStatusSyncFailed)
c.KubeClient.SetPostgresCRDStatus(clusterName, acidv1.ClusterStatusSyncFailed, informerNewSpec.Status.NumberOfInstances, labelstring, informerNewSpec.Status.ObservedGeneration, existingConditions, clusterError)
c.eventRecorder.Eventf(c.GetReference(informerNewSpec), v1.EventTypeWarning, "Sync", "%v", clusterError)
}

Expand Down
Loading
Loading