From a3fca92c1da24031e55b9311996237d20e5e8d8d Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Sun, 7 Jan 2024 17:43:39 +0100 Subject: [PATCH 1/2] HBASE-27830 Introduce hdfs overlay Provide a basic implementation for running HDFS using the docker image provided in HBASE-27846. Also provides testing capabilities using the test image added in HBASE-27829. Makes some tweaks to that image to get it running in a linux environment -- Docker on MacOS make some user permission adjustments that are not present in linux. --- .gitignore | 4 + base/README.md | 39 +++ base/delete-format-hdfs-configmap-job.yaml | 89 +++++ base/environment-configmap.yaml | 70 ++++ base/java.security | 18 + base/jmxexporter.yaml | 32 ++ base/kustomization.yaml | 71 ++++ base/log4j.properties.hadoop | 55 +++ base/namespace.yaml | 23 ++ base/rbac.yaml | 103 ++++++ base/scripts/apiserver_access.sh | 26 ++ base/scripts/delete_configmap.sh | 34 ++ base/scripts/describe_node.sh | 33 ++ base/scripts/exists_configmap.sh | 36 ++ base/scripts/get_node_labels.sh | 20 ++ base/scripts/get_node_labels_from_pod_IP.sh | 32 ++ base/scripts/get_node_name_from_pod_IP.sh | 35 ++ base/scripts/get_statefulset.sh | 33 ++ base/scripts/get_statefulset_replica_count.sh | 20 ++ base/scripts/jmxping.sh | 96 ++++++ base/scripts/log.sh | 44 +++ base/scripts/topology.sh | 66 ++++ base/ssl-client.xml | 52 +++ base/ssl-server.xml | 56 +++ dev-support/jenkins/Jenkinsfile | 2 +- dockerfiles/kuttl/Dockerfile | 34 +- dockerfiles/kuttl/README.md | 19 +- dockerfiles/kuttl/docker-bake.hcl | 10 +- overlays/hdfs/core-site.xml | 96 ++++++ overlays/hdfs/dn-service.yaml | 29 ++ overlays/hdfs/dn-statefulset.yaml | 222 ++++++++++++ overlays/hdfs/hdfs-site.xml | 274 +++++++++++++++ overlays/hdfs/kustomization.yaml | 36 ++ overlays/hdfs/nn-service.yaml | 37 ++ overlays/hdfs/nn-statefulset.yaml | 326 ++++++++++++++++++ tests/README.md | 204 +++++++++++ tests/bin/kustomize_into_tmpdir.sh | 90 +++++ .../integration/overlays_hdfs/00-assert.yaml | 31 ++ .../overlays_hdfs/00-kustomize.yaml | 20 ++ .../overlays_hdfs/kustomization.yaml | 22 ++ .../integration/test_base/kustomization.yaml | 29 ++ .../integration/test_base/networkpolicy.yaml | 29 ++ tests/kuttl-test-integration.yaml | 37 ++ tests/kuttl-test-unit.yaml | 28 ++ tests/unit/base/00-assert.yaml | 72 ++++ tests/unit/base/00-kustomize.yaml | 20 ++ tests/unit/base/README.md | 24 ++ tests/unit/base/kustomization.yaml | 20 ++ tests/unit/overlays_hdfs/00-assert.yaml | 73 ++++ tests/unit/overlays_hdfs/00-kustomize.yaml | 20 ++ tests/unit/overlays_hdfs/kustomization.yaml | 21 ++ 51 files changed, 2886 insertions(+), 26 deletions(-) create mode 100644 .gitignore create mode 100644 base/README.md create mode 100644 base/delete-format-hdfs-configmap-job.yaml create mode 100644 base/environment-configmap.yaml create mode 100644 base/java.security create mode 100644 base/jmxexporter.yaml create mode 100644 base/kustomization.yaml create mode 100644 base/log4j.properties.hadoop create mode 100644 base/namespace.yaml create mode 100644 base/rbac.yaml create mode 100755 base/scripts/apiserver_access.sh create mode 100755 base/scripts/delete_configmap.sh create mode 100644 base/scripts/describe_node.sh create mode 100755 base/scripts/exists_configmap.sh create mode 100644 base/scripts/get_node_labels.sh create mode 100644 base/scripts/get_node_labels_from_pod_IP.sh create mode 100644 base/scripts/get_node_name_from_pod_IP.sh create mode 100755 base/scripts/get_statefulset.sh create mode 100755 base/scripts/get_statefulset_replica_count.sh create mode 100755 base/scripts/jmxping.sh create mode 100644 base/scripts/log.sh create mode 100755 base/scripts/topology.sh create mode 100644 base/ssl-client.xml create mode 100644 base/ssl-server.xml create mode 100644 overlays/hdfs/core-site.xml create mode 100644 overlays/hdfs/dn-service.yaml create mode 100644 overlays/hdfs/dn-statefulset.yaml create mode 100644 overlays/hdfs/hdfs-site.xml create mode 100644 overlays/hdfs/kustomization.yaml create mode 100644 overlays/hdfs/nn-service.yaml create mode 100644 overlays/hdfs/nn-statefulset.yaml create mode 100644 tests/README.md create mode 100755 tests/bin/kustomize_into_tmpdir.sh create mode 100644 tests/integration/overlays_hdfs/00-assert.yaml create mode 100644 tests/integration/overlays_hdfs/00-kustomize.yaml create mode 100644 tests/integration/overlays_hdfs/kustomization.yaml create mode 100644 tests/integration/test_base/kustomization.yaml create mode 100644 tests/integration/test_base/networkpolicy.yaml create mode 100644 tests/kuttl-test-integration.yaml create mode 100644 tests/kuttl-test-unit.yaml create mode 100644 tests/unit/base/00-assert.yaml create mode 100644 tests/unit/base/00-kustomize.yaml create mode 100644 tests/unit/base/README.md create mode 100644 tests/unit/base/kustomization.yaml create mode 100644 tests/unit/overlays_hdfs/00-assert.yaml create mode 100644 tests/unit/overlays_hdfs/00-kustomize.yaml create mode 100644 tests/unit/overlays_hdfs/kustomization.yaml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5142412 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ + +# kuttl detritus +TEST-kuttl-report-unit.xml +kubeconfig diff --git a/base/README.md b/base/README.md new file mode 100644 index 0000000..d472796 --- /dev/null +++ b/base/README.md @@ -0,0 +1,39 @@ + + +# Base + +This directory serves as the common foundation upon which subsequent `overlays` build. This +directory is mostly an artifact of developmental versions of this structure and much of it can +probably be refactored out into other overlays. + +Some values such as SERVICE name, SERVICEACCOUNT name, and RBAC role are hard-coded in the +`environment-configmap.yaml` and supplied into the pods as environment variables. Other +hardcodings include the service name ('hadoop') and the namespace we run in (also 'hadoop'). + +The Hadoop Configuration system can interpolate environment variables into '\*.xml' file values +ONLY. See [Configuration +Javadoc](http://hadoop.apache.org/docs/current/api/org/apache/hadoop/conf/Configuration.html) + +...but we can not do interpolation of SERVICE name into '\*.xml' file key names as is needed when +doing HA in hdfs-site.xml... so for now, we have hard-codings in 'hdfs-site.xml' key names. For +example, the property key name `dfs.ha.namenodes.hadoop` has the SERVICE name ('hadoop') in it or +the key `dfs.namenode.http-address.hadoop` (TODO: Fix/Workaround). + +Edit of pod resources or jvm args for a process are done in place in the yaml files or in +kustomization replacements in overlays. diff --git a/base/delete-format-hdfs-configmap-job.yaml b/base/delete-format-hdfs-configmap-job.yaml new file mode 100644 index 0000000..cc52f4d --- /dev/null +++ b/base/delete-format-hdfs-configmap-job.yaml @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Job to delete the 'format-hdfs' configmap after hdfs has come up +# successfully. The 'format-hdfs' configmap is added by running +# 'kubectl -n hadoop apply -k tools/format-hdfs' (You need the +# '-n hadoop' to apply the configmap to the 'hadoop' namespace). +# Add the configmap if you want hdfs to format the filesystem. +# Do this on initial install only or if you want to clean out +# the current HDFS data. +# +# If the 'format-hdfs' configmap is NOT present, this Job exits/completes. +# Otherwise, it keeps probing until HDFS is up and healthy, and then +# this job removes the 'format-hdfs' configmap. The presence of the +# 'format-hdfs' configmap is checked by all hdfs pods on startup. If +# the configmap is present, they clean out their data directories and run +# format/recreate of their data directories. To install the 'format-hdfs' +# configmap, do it before launch of hdfs. See tools/format-hdfs. +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: delete-format-hdfs-configmap +spec: + ttlSecondsAfterFinished: 300 + template: + spec: + containers: + - image: hadoop + name: delete-format-hdfs-configmap + imagePullPolicy: IfNotPresent + command: + - /bin/bash + - -c + - |- + set -xe + # See if 'format-hdfs' configmap is present. + # If not, then there is nothing for this job to do, complete, exit 0. + /tmp/scripts/exists_configmap.sh format-hdfs || { + echo "No 'format-hdfs' configmap found so no work to do; exiting" + exit 0 + } + # The `format-hdfs`` configmap is present. Remove it after HDFS is fully up. + /tmp/scripts/jmxping.sh namenode ${HADOOP_SERVICE} + /tmp/scripts/jmxping.sh datanode ${HADOOP_SERVICE} + # TODO: Should we check if ha and if so, if a NN active... get a report on health? + # HDFS is up. Delete the format-hdfs flag. + /tmp/scripts/delete_configmap.sh format-hdfs + resources: + requests: + cpu: '0.2' + memory: 256Mi + limits: + cpu: '0.5' + memory: 512Mi + envFrom: + - configMapRef: + name: environment + volumeMounts: + - mountPath: /tmp/scripts + name: scripts + # Scratch dir is a location where init containers place items for later use + # by the main containers when they run. + - mountPath: /tmp/scratch + name: scratch + serviceAccountName: hadoop + restartPolicy: Never + volumes: + - configMap: + name: scripts + defaultMode: 0555 + name: scripts + # Scratch dir is location where init containers place items for later use + # by the main containers when they run. + - emptyDir: {} + name: scratch diff --git a/base/environment-configmap.yaml b/base/environment-configmap.yaml new file mode 100644 index 0000000..d018c22 --- /dev/null +++ b/base/environment-configmap.yaml @@ -0,0 +1,70 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Common environment variables shared across pods. +# Include w/ the 'envFrom:' directive. +# We have to be pendantic in here. We cannot have a value +# refer to a define made earlier; the interpolation +# doesn't work. +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: environment +data: + DOMAIN: svc.cluster.local + # HADOOP_HOME, HADOOP_HDFS_HOME, etc., and HBASE_HOME are provided by the images. + # + # The headless-service pods in our statefulsets come up in. + # See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id + # The headless-service is defined in the adjacent rbac.yaml. + # Matches the serviceName we have on our statefulsets. + # Required that we create it according to https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#limitations + HADOOP_SERVICE: hadoop + # dfs.http.policy + # If HTTPS_ONLY or HTTPS_OR_HTTP then we'll depend on https in UI and jmx'ing + # and will adjust schema and ports accordingly. If https, we need to get certificates + # so cert-manager, etc., needs to be instaled. + HTTP_POLICY: HTTP_ONLY + DFS_HTTPS_ENABLE: "false" + HBASE_SSL_ENABLED: "false" + HTTP_AUTH: kerberos + # The insecure port for now. + DATANODE_DATA_DIR: /data00/dn + JOURNALNODE_DATA_DIR: /data00/jn + NAMENODE_DATA_DIR: /data00/nn + HDFS_AUDIT_LOGGER: INFO,RFAAUDIT + HADOOP_DAEMON_ROOT_LOGGER: INFO,RFA,CONSOLE + HADOOP_ROOT_LOGGER: INFO,RFA,CONSOLE + HADOOP_SECURITY_LOGGER: INFO,RFAS + HADOOP_CONF_DIR: /etc/hadoop + HADOOP_LOG_DIR: /var/log/hadoop + HADOOP_SECURE_LOG: /var/log/hadoop + HBASE_ROOT_LOGGER: DEBUG,RFA,console + HBASE_LOG_DIR: /var/log/hbase + HBASE_CONF_DIR: /etc/hbase + # if [ "$HBASE_NO_REDIRECT_LOG" != "" ]; then ... so we are asking for NO redirect of logs. + HBASE_NO_REDIRECT_LOG: "true" + HBASE_MANAGES_ZK: "false" + DFS_REPLICATION: "1" + # What percentage of the container memory to give over to the JVM. + # Be aware that we look at the container resource limit, NOT request: e.g. if + # the resource request memory is set to 8G and the limit is 16G and the + # JVM_HEAP_PERCENTAGE_OF_RESOURCE_LIMIT is 50 as in 50%, + # the heap will be set to 8G: i.e. 1/2 of the 16G limit. + # ip-172-18-132-227.us-west-2.compute.internal + # See https://dzone.com/articles/best-practices-java-memory-arguments-for-container + JVM_HEAP_PERCENTAGE_OF_RESOURCE_LIMIT: "45" diff --git a/base/java.security b/base/java.security new file mode 100644 index 0000000..c5c4f04 --- /dev/null +++ b/base/java.security @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +networkaddress.cache.ttl=1 +networkaddress.cache.negative.ttl=0 diff --git a/base/jmxexporter.yaml b/base/jmxexporter.yaml new file mode 100644 index 0000000..4dd20fa --- /dev/null +++ b/base/jmxexporter.yaml @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# We run the jmxexporter on most all processes to convert jmx metrics to prometheus. +# This is the config file it uses. +# +# Don't lowercase. Leave the metrics in camelcase. Do this because while +# jmxexport can lowercase metrics names, telegraf can't. +# +#lowercaseOutputName: false +#lowercaseOutputLabelNames: false +# From https://godatadriven.com/blog/monitoring-hbase-with-prometheus/ +#rules: +# - pattern: HadoopNamespace_([^\W_]+)_table_([^\W_]+)_region_([^\W_]+)_metric_(\w+) +# name: HBase_metric_$4 +# labels: +# namespace: "$1" +# table: "$2" +# region: "$3" diff --git a/base/kustomization.yaml b/base/kustomization.yaml new file mode 100644 index 0000000..43dd57c --- /dev/null +++ b/base/kustomization.yaml @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +configMapGenerator: +- name: hadoop-configuration + # Base set of hadoop configurations. Overlays will add to the set here. + files: + - log4j.properties=log4j.properties.hadoop +- name: scripts + # Useful scripts + files: + - scripts/jmxping.sh + - scripts/apiserver_access.sh + - scripts/get_statefulset_replica_count.sh + - scripts/get_statefulset.sh + - scripts/exists_configmap.sh + - scripts/delete_configmap.sh + - scripts/topology.sh + - scripts/describe_node.sh + - scripts/get_node_name_from_pod_IP.sh + - scripts/get_node_labels.sh + - scripts/get_node_labels_from_pod_IP.sh + - scripts/log.sh + options: + disableNameSuffixHash: true +- name: global-files + # Add files used by most/all processes into a global configuration configmap + # accessible to all processes. The environment-configmap defines env varibles used by + # all processes and pods. This configmap loads files used by each process. + files: + - jmxexporter.yaml + - java.security + - ssl-client.xml + - ssl-server.xml + options: + disableNameSuffixHash: true + +secretGenerator: +- name: keystore-password + type: Opaque + options: + disableNameSuffixHash: true + literals: + - password=changeit + +resources: +- namespace.yaml +# Global environment variables read in by pods +- environment-configmap.yaml +- rbac.yaml +- delete-format-hdfs-configmap-job.yaml +# These depend on cert-manager being installed. +# See https://cert-manager.io/docs/installation/ +#- clusterissuer.yaml +#- certificate.yaml diff --git a/base/log4j.properties.hadoop b/base/log4j.properties.hadoop new file mode 100644 index 0000000..df7cf7b --- /dev/null +++ b/base/log4j.properties.hadoop @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +hadoop.console.threshold=LOG +hadoop.log.maxbackupindex=20 +hadoop.log.maxfilesize=256MB +hadoop.root.logger=TRACE,CONSOLE +hadoop.security.log.file=SecurityAuth-${user.name}.audit +hadoop.security.log.maxbackupindex=20 +hadoop.security.log.maxfilesize=256MB +hadoop.security.logger=INFO,RFAS +hdfs.audit.log.maxbackupindex=20 +hdfs.audit.log.maxfilesize=256MB +hdfs.audit.logger=INFO,RFAAUDIT +log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false +log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender +log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout +log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n +log4j.appender.CONSOLE.Threshold=${hadoop.console.threshold} +log4j.appender.RFA=org.apache.log4j.RollingFileAppender +log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file} +log4j.appender.RFA.layout=org.apache.log4j.PatternLayout +log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n +log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex} +log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize} +log4j.appender.RFAAUDIT=org.apache.log4j.RollingFileAppender +log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log +log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout +log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n +log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex} +log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize} +log4j.appender.RFAS=org.apache.log4j.RollingFileAppender +log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file} +log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout +log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n +log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex} +log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize} +log4j.category.SecurityLogger=${hadoop.security.logger} +log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger} +log4j.logger.org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy=DEBUG +log4j.logger.org.apache.hadoop.net.NetworkTopology=DEBUG +log4j.rootLogger=${hadoop.root.logger} diff --git a/base/namespace.yaml b/base/namespace.yaml new file mode 100644 index 0000000..f3e73a6 --- /dev/null +++ b/base/namespace.yaml @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Allow the namespace of the user's Kustomization to be the destination of the deployment. +# How to manage the namespace with Kustomize -- https://stackoverflow.com/a/71150557 +--- +apiVersion: v1 +kind: Namespace +metadata: + name: ~~illegal_value_to_be_overridden_in_Kustomization~~ diff --git a/base/rbac.yaml b/base/rbac.yaml new file mode 100644 index 0000000..29e9c89 --- /dev/null +++ b/base/rbac.yaml @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Service and ServiceAccount names are hard-coded as 'hadoop'. +# RBAC Role name is also hard-coded as 'hadoop-role'. Service selects on +# an app named 'hadoop', another hard-coding. +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: hadoop-role +rules: +- resources: + - configmaps + verbs: + - get + - delete + - list + apiGroups: + - '' +- resources: + - namespaces + verbs: + - get + - list + apiGroups: + - '' +- resources: + - statefulsets + verbs: + - get + - list + - patch + - update + apiGroups: + - 'apps' + - 'api' +- resources: + - pods + verbs: + - get + - list + - delete + - watch + apiGroups: + - '' +- resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + apiGroups: + - coordination.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: hadoop-role-binding +subjects: +- kind: ServiceAccount + name: hadoop +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: hadoop-role +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: hadoop +--- +# Headless-service to cluster all our pods under +# Matches the ServiceAccount above referenced by statefulsets +# in their serviceName. +# See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id +# This is required for statefulsets. See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#limitations +apiVersion: v1 +kind: Service +metadata: + name: hadoop +spec: + clusterIP: None + publishNotReadyAddresses: true + selector: + app: hadoop diff --git a/base/scripts/apiserver_access.sh b/base/scripts/apiserver_access.sh new file mode 100755 index 0000000..4a2929f --- /dev/null +++ b/base/scripts/apiserver_access.sh @@ -0,0 +1,26 @@ +#! /usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Defines used accessing the apiserver. +NAMESPACE=$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace) +export NAMESPACE +APISERVER=https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT +export APISERVER +CACERT=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt +export CACERT +TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token) +export TOKEN diff --git a/base/scripts/delete_configmap.sh b/base/scripts/delete_configmap.sh new file mode 100755 index 0000000..58a3107 --- /dev/null +++ b/base/scripts/delete_configmap.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Get the description of the named statefulset +set -x +configmap_name="${1}" +outfile="$(mktemp "/tmp/$(basename "$0").XXXX")" +trap '{ rm -f -- "$outfile"; }' EXIT +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +# shellcheck source=/dev/null +source "${script_dir}/apiserver_access.sh" +# Following model described here: https://chengdol.github.io/2019/11/06/k8s-api/ +# http_code is the return status code +# From https://docs.okd.io/3.7/rest_api/api/v1.ConfigMap.html#Delete-api-v1-namespaces-namespace-configmaps-name +http_code=$(curl -w "%{http_code}" -sS -X DELETE --cacert "$CACERT" -H "Content-Type: application/json" -H "Accept: application/json, */*" -H "Authorization: Bearer $TOKEN" "$APISERVER/api/v1/namespaces/$NAMESPACE/configmaps/$configmap_name" -o "$outfile") +if [[ $http_code -ne 200 ]]; then + echo "{\"Result\": \"Failure\", \"httpReturnCode\":$http_code}" | jq '.' + exit 1 +fi +cat "$outfile" diff --git a/base/scripts/describe_node.sh b/base/scripts/describe_node.sh new file mode 100644 index 0000000..df1ffa1 --- /dev/null +++ b/base/scripts/describe_node.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Get the description of the named node +set -x +node="${1}" +outfile="$(mktemp "/tmp/$(basename "$0")".XXXX)" +trap '{ rm -f -- "$outfile"; }' EXIT +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +# shellcheck source=/dev/null +source "${script_dir}/apiserver_access.sh" +# Following model described here: https://chengdol.github.io/2019/11/06/k8s-api/ +# http_code is the return status code +http_code=$(curl -w "%{http_code}" -sS --cacert "$CACERT" -H "Content-Type: application/json" -H "Accept: application/json, */*" -H "Authorization: Bearer $TOKEN" "$APISERVER/api/v1/nodes/$node" -o "$outfile") +if [[ $http_code -ne 200 ]]; then + echo "{\"Result\": \"Failure\", \"httpReturnCode\":$http_code}" | jq '.' + exit 1 +fi +cat "$outfile" diff --git a/base/scripts/exists_configmap.sh b/base/scripts/exists_configmap.sh new file mode 100755 index 0000000..7ed9a8f --- /dev/null +++ b/base/scripts/exists_configmap.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Check passed in configmap exists. +# Also checks if configmap with the POD_NAME exists too. +# Returns zero if found. +set -x +configmap_name="${1}" +outfile="$(mktemp "/tmp/$(basename "$0").XXXX")" +trap 'rm -f -- "$outfile"' EXIT +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +# shellcheck source=/dev/null +source "${script_dir}/apiserver_access.sh" +# Following model described here: https://chengdol.github.io/2019/11/06/k8s-api/ +# http_code is the return status code +# From https://docs.okd.io/3.7/rest_api/api/v1.ConfigMap.html#Delete-api-v1-namespaces-namespace-configmaps-name +http_code=$(curl -w "%{http_code}" -sS --cacert "$CACERT" -H "Content-Type: application/json" -H "Accept: application/json, */*" -H "Authorization: Bearer $TOKEN" "$APISERVER/api/v1/namespaces/$NAMESPACE/configmaps/$configmap_name" -o "$outfile") +[[ $http_code -eq 200 ]] || ( + # The configmap does not exist. Look for a configmap with this POD_NAME as a suffix too. + http_code=$(curl -w "%{http_code}" -sS --cacert "$CACERT" -H "Content-Type: application/json" -H "Accept: application/json, */*" -H "Authorization: Bearer $TOKEN" "$APISERVER/api/v1/namespaces/$NAMESPACE/configmaps/$configmap_name.${POD_NAME}" -o "$outfile") + [[ $http_code -eq 200 ]] +) diff --git a/base/scripts/get_node_labels.sh b/base/scripts/get_node_labels.sh new file mode 100644 index 0000000..bb50a65 --- /dev/null +++ b/base/scripts/get_node_labels.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Fetch the labels json object for named node +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +"${script_dir}/describe_node.sh" "${1}" | jq -r '.metadata.labels' diff --git a/base/scripts/get_node_labels_from_pod_IP.sh b/base/scripts/get_node_labels_from_pod_IP.sh new file mode 100644 index 0000000..7458de3 --- /dev/null +++ b/base/scripts/get_node_labels_from_pod_IP.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Get the labels json object of the node upon which the pod with the provided pod IP is running +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +# shellcheck source=/dev/null +source "${script_dir}/log.sh" "$TOPOLOGY_LOG" # source log function; the $TOPOLOGY_LOG variable is set in topology.sh +nodeName=$("${script_dir}/get_node_name_from_pod_IP.sh" "${1}") # requesting node name based on pod IP +if [[ "$nodeName" == "null" ]] # if no node is found when querying with this pod IP +then + log -w "Unhandled case: Kubernetes instance not found for this pod IP" + echo "null" # null will get passed back to the topology caller; then when looking for the pertinent labels topology.sh will label this DN with the default rack +else + log "nodeName found in pod description: $nodeName" + nodeLabels="$("${script_dir}/get_node_labels.sh" "$nodeName")" # getting the labels of the Kube node the pod is running on + log "node metadata labels: $nodeLabels" + echo "$nodeLabels" +fi diff --git a/base/scripts/get_node_name_from_pod_IP.sh b/base/scripts/get_node_name_from_pod_IP.sh new file mode 100644 index 0000000..a0cb279 --- /dev/null +++ b/base/scripts/get_node_name_from_pod_IP.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Get the name of the Kubernetes node with the provided hadoop pod IP +set -x +podIP="${1}" # this will be the IP of a datanode +outfile="$(mktemp "/tmp/$(basename "$0").XXXX")" +trap '{ rm -f -- "$outfile"; }' EXIT +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +# shellcheck source=/dev/null +source "${script_dir}/apiserver_access.sh" +# Following model described here: https://chengdol.github.io/2019/11/06/k8s-api/ +# http_code is the return status code +http_code=$(curl -w "%{http_code}" -sS --cacert "$CACERT" -H "Content-Type: application/json" -H "Accept: application/json, */*" -H "Authorization: Bearer $TOKEN" "$APISERVER/api/v1/namespaces/hadoop/pods?fieldSelector=status.podIP%3D$podIP" -o "$outfile") +if [[ $http_code -ne 200 ]]; then + echo "{\"Result\": \"Failure\", \"httpReturnCode\":$http_code}" | jq '.' + exit 1 +fi + +# using jq, only return the name of the node containing this pod; jq will return null if no node is found +jq -r .items[0].spec.nodeName "$outfile" diff --git a/base/scripts/get_statefulset.sh b/base/scripts/get_statefulset.sh new file mode 100755 index 0000000..b6a9162 --- /dev/null +++ b/base/scripts/get_statefulset.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Get the description of the named statefulset +set -x +statefulset="${1}" +outfile="$(mktemp "/tmp/$(basename "$0").XXXX")" +trap '{ rm -f -- "$outfile"; }' EXIT +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +# shellcheck source=/dev/null +source "${script_dir}/apiserver_access.sh" +# Following model described here: https://chengdol.github.io/2019/11/06/k8s-api/ +# http_code is the return status code +http_code=$(curl -w "%{http_code}" -sS --cacert "$CACERT" -H "Content-Type: application/json" -H "Accept: application/json, */*" -H "Authorization: Bearer $TOKEN" "$APISERVER/apis/apps/v1/namespaces/$NAMESPACE/statefulsets/$statefulset" -o "$outfile") +if [[ $http_code -ne 200 ]]; then + echo "{\"Result\": \"Failure\", \"httpReturnCode\":$http_code}" | jq '.' + exit 1 +fi +cat "$outfile" diff --git a/base/scripts/get_statefulset_replica_count.sh b/base/scripts/get_statefulset_replica_count.sh new file mode 100755 index 0000000..ad1149a --- /dev/null +++ b/base/scripts/get_statefulset_replica_count.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Fetch the replica count for named statefulset +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +"${script_dir}/get_statefulset.sh" "${1}" | jq '.spec.replicas' diff --git a/base/scripts/jmxping.sh b/base/scripts/jmxping.sh new file mode 100755 index 0000000..2cf7aab --- /dev/null +++ b/base/scripts/jmxping.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Usage: jmxping.sh [] +# JMX ping that there are at least '' instances of '' +# running in the sub-domain specified by +# (See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id). +# If no '' supplied, we read the replica count from passed +# in '' statefulset from apiserver. +set -x +role="${1}" +service="${2}" +count_param="${3}" +# Schema +schema=http +if [[ ${HTTP_POLICY} == HTTPS_* ]]; then + schema=https +fi +# Jmxport to use +case "${role}" in + datanode) + jmxport=9864 + if [[ ${HTTP_POLICY} == HTTPS_* ]]; then + # If HTTP policy is https, use https jmx port. + jmxport=9865 + fi + ;; + namenode) + jmxport=9870 + if [[ ${HTTP_POLICY} == HTTPS_* ]]; then + # If HTTP policy is https, use https jmx port. + jmxport=9871 + fi + ;; + journalnode) + jmxport=8480 + if [[ ${HTTP_POLICY} == HTTPS_* ]]; then + # If HTTP policy is https, use https jmx port. + jmxport=8481 + fi + ;; + master) + jmxport=16010 + ;; + regionserver) + jmxport=16030 + ;; + *) + exit 1 + ;; +esac + +interval=5 +timeout=$((60 * 60)) +while ((timeout > 0)) +do + # The statefulset we depend on may not have deployed yet... so the first + # attempts at getting replicas may fail. + # https://stackoverflow.com/questions/3601515/how-to-check-if-a-variable-is-set-in-bash + replicas="$(/tmp/scripts/get_statefulset_replica_count.sh "$role")" + count=${count_param} + if [ "x" = "${count_param}x" ]; then + count=${replicas} + else + count=$((replicas < count_param? replicas : count_param )) + fi + seq_end=$(( count - 1 )) + total=0 + for i in $( seq 0 $seq_end ); do + # Url is http://journalnode-1:8480/jmx?qry=java.lang:type=OperatingSystem + url="${schema}://${role}-${i}.${service}:${jmxport}/jmx?qry=java.lang:type=OperatingSystem" + # Returns 1 if success, zero otherwise. + result=$(curl --cacert /tmp/scratch/ca.crt -v "$url" | grep -c SystemLoadAverage) + ((total+=result)) + ((total != count)) || exit 0 + done + timeout=$((timeout - interval)) + echo "Failed; sleeping $interval, then retrying for $timeout more seconds" + sleep $interval +done +echo "Timedout!" +exit 1 diff --git a/base/scripts/log.sh b/base/scripts/log.sh new file mode 100644 index 0000000..f1b9b6a --- /dev/null +++ b/base/scripts/log.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# when sourcing log, first argument should be the file within $HADOOP_LOG_DIR that will be written to + +filename=${1} +LOG_FILEPATH="$HADOOP_LOG_DIR/$filename" + +# logs provided message to whichever filepath is provided when sourcing log.sh +# Use -e for error logging, -w for warning logs +# log [-ew] MESSAGE +log(){ + prefix="" # No prefix with default INFO-level logging + while getopts ":ew" arg; do + case $arg in + e) # change prefix to ERROR: in logs + prefix="ERROR:" + shift + ;; + w) # change prefix to WARNING: in logs + prefix="WARNING:" + shift + ;; + *) # what is this? + ;; + esac + done + message=${1} + echo "$(date +"%F %T") $prefix $message" >> "$LOG_FILEPATH" +} diff --git a/base/scripts/topology.sh b/base/scripts/topology.sh new file mode 100755 index 0000000..d405136 --- /dev/null +++ b/base/scripts/topology.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Using topology script notion for HDFS rack awareness: https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/RackAwareness.html + +# This script takes in one or more datanode IPs as args and passes out rack name(s) for the pod(s) based on the EKS instance(s) they're running in. +# It will look for information about the EKS instance's partition placement group: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/placement-groups.html#placement-groups-partition +# As well as information about the EKS instance's availability zone according to AWS: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-availability-zones + +# if partition placement group information is found (in the form of the $partition_group_label variable defined below), +# then the rack passed out will be "partition-group-". + +# Otherwise, the script will take in availability zone information, pass out a +# rack label like "availability-zone-". + +# Supposition here is that when datanodes crash, the namenodes will provide the same rack when the pod comes back up. +# This is the behavior that's been observed when terminating datanodes manually and watching topology logs as they re-initialize. + +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + +TOPOLOGY_LOG="topology.log" # filepath within $HADOOP_LOG_DIR wherein topology logs will be placed +export TOPOLOGY_LOG + +# shellcheck source=/dev/null +source "${script_dir}/log.sh" "$TOPOLOGY_LOG" +partition_group_label="partition_number" # this is an assumption made based on the Siri cluster at the moment; modify this variable if the Kube node label signifying placement groups is named differently + +log "argument(s) input to script: $*" +for dn_IP in "$@" +do + log "datanode IP: $dn_IP" + nodeLabels="$("${script_dir}/get_node_labels_from_pod_IP.sh" "$dn_IP")" + nodePartitionGroup="$(echo "$nodeLabels" | jq -r ".$partition_group_label")" + if [[ "$nodePartitionGroup" == "null" ]]; + then + nodeAZ="$(echo "$nodeLabels" | jq -r '."topology.kubernetes.io/zone"')" + if [[ "$nodeAZ" == "null" ]]; + then + rack="/default-rack" # when no partition group or availability zone info is found for the datanode + log "No partition groups or availability zones found; output default rack $rack for $dn_IP" + echo "$rack" + else + rack="/availability-zone-$nodeAZ" + log "output rack $rack for $dn_IP" + echo "$rack" + fi + else + rack="/partition-group-$nodePartitionGroup" + log "output rack $rack for $dn_IP" + echo "$rack" + fi +done diff --git a/base/ssl-client.xml b/base/ssl-client.xml new file mode 100644 index 0000000..3a8ffff --- /dev/null +++ b/base/ssl-client.xml @@ -0,0 +1,52 @@ + + + + + ssl.client.keystore.keypassword + changeit + + + ssl.client.keystore.location + /tmp/scratch/keystore.jks + + + ssl.client.keystore.password + changeit + + + ssl.client.keystore.type + jks + + + ssl.client.truststore.location + /tmp/scratch/keystore.jks + + + ssl.client.truststore.password + changeit + + + ssl.client.truststore.reload.interval + 10000 + + + ssl.client.truststore.type + jks + + diff --git a/base/ssl-server.xml b/base/ssl-server.xml new file mode 100644 index 0000000..25e26dd --- /dev/null +++ b/base/ssl-server.xml @@ -0,0 +1,56 @@ + + + + + ssl.server.keystore.keypassword + + + + ssl.server.keystore.password + changeit + + + ssl.server.keystore.location + /tmp/scratch/keystore.jks + + + ssl.server.keystore.type + jks + + + ssl.server.truststore.location + /tmp/scratch/truststore.jks + + + ssl.server.truststore.keypassword + + + + ssl.server.truststore.password + changeit + + + ssl.server.truststore.reload.interval + 10000 + + + ssl.server.truststore.type + jks + + diff --git a/dev-support/jenkins/Jenkinsfile b/dev-support/jenkins/Jenkinsfile index 67cac71..84781b8 100644 --- a/dev-support/jenkins/Jenkinsfile +++ b/dev-support/jenkins/Jenkinsfile @@ -65,7 +65,7 @@ pipeline { } dir("${SOURCEDIR}") { sh label: 'build_docker_image.sh kuttl', script: '''#!/bin/bash -e - ./dev-support/jenkins/build_docker_image.sh kuttl --load + NON_ROOT_USER_ID=$(id -u) ./dev-support/jenkins/build_docker_image.sh kuttl --load docker image ls ''' } diff --git a/dockerfiles/kuttl/Dockerfile b/dockerfiles/kuttl/Dockerfile index 1b77f01..d879dc2 100644 --- a/dockerfiles/kuttl/Dockerfile +++ b/dockerfiles/kuttl/Dockerfile @@ -150,6 +150,9 @@ FROM hadolint_$TARGETARCH AS hadolint FROM ${IMG_BASE}:${IMG_BASE_TAG} as final ARG IMG_BASE ARG IMG_BASE_TAG +ARG ENVTEST_K8S_VERSION +ARG NON_ROOT_USER +ARG NON_ROOT_USER_ID COPY --from=hadolint /bin/hadolint /bin/hadolint COPY --from=kubectl /tmp/kubectl /usr/local/bin/ @@ -158,34 +161,39 @@ COPY --from=kuttl /tmp/kubectl-kuttl /usr/local/bin/ COPY --from=yq /usr/bin/yq /usr/bin/yq COPY --chmod=0755 dockerfiles/kuttl/entrypoint.sh /bin/ -# nonroot user as defined in https://github.com/GoogleContainerTools/distroless -ENV NON_ROOT_USER=nonroot -ENV NON_ROOT_USER_ID=65532 -ENV NON_ROOT_USER_HOME=/home/nonroot +ENV NON_ROOT_USER="${NON_ROOT_USER}" +ENV NON_ROOT_USER_ID="${NON_ROOT_USER_ID}" +ENV NON_ROOT_USER_HOME="/home/${NON_ROOT_USER}" ENV DEBIAN_FRONTEND=noninteractive ENV DEBCONF_TERSE=true +# make `go install` executables available to all image users +ENV GOPATH="/usr/local/share/go" +ENV GOBIN="${GOPATH}/bin" + # hadolint ignore=DL3018 -RUN apt-get -q update \ +RUN [ ! -z "${NON_ROOT_USER}" ] \ + && [ ! -z "${NON_ROOT_USER_ID}" ] \ + && apt-get -q update \ && apt-get -q -y upgrade \ && apt-get -q clean \ && rm -rf /var/lib/apt/lists/* \ && adduser --disabled-password --uid "${NON_ROOT_USER_ID}" "${NON_ROOT_USER}" \ - && adduser "${NON_ROOT_USER}" docker + && adduser "${NON_ROOT_USER}" docker \ + && go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest \ + && ln -s "${GOBIN}/setup-envtest" /usr/local/bin/setup-envtest WORKDIR "${NON_ROOT_USER_HOME}" USER "${NON_ROOT_USER}" -# setup a go dev environment -ENV GOPATH="${NON_ROOT_USER_HOME}/go" +# replicate these variables for the runtime user +ENV GOPATH="/usr/local/share/go" ENV GOBIN="${GOPATH}/bin" -ENV PATH="${PATH}:${GOBIN}" -# replicate the test-related bits generated by `kubebuilder` into its Makefile. -ENV ENVTEST_K8S_VERSION='1.23.x' -RUN go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest \ - && "${GOBIN}/setup-envtest" use "${ENVTEST_K8S_VERSION}" +# pin envtest at a specific k8s version and install it +ENV ENVTEST_K8S_VERSION="${ENVTEST_K8S_VERSION}" +RUN setup-envtest use "${ENVTEST_K8S_VERSION}" # disable downloading remote content henceforth ENV ENVTEST_INSTALLED_ONLY=true diff --git a/dockerfiles/kuttl/README.md b/dockerfiles/kuttl/README.md index ab66350..3da7e63 100644 --- a/dockerfiles/kuttl/README.md +++ b/dockerfiles/kuttl/README.md @@ -27,6 +27,11 @@ cluster. This image is used as the basis for both dev and test environments. Building the docker image locally is a little picky because there's lots of input arguments. These are managed via the [docker-bake.override.hcl](./docker-bake.override.hcl). +In order for this image to be usable as a test runner, the `USER` and `UID` in the image must +match the local system user. This is enabled by default for `USER`. However the `UID` must be read +from a command and docker bake doesn't support arbitrary shell execution. Instead, explicitly pass +this value to the build-arg via environment variable. + Start by creating a buildx context that supports (optionally) multi-platform images. If you've created this context previously, it's enough to ensure that it's active via `docker buildx ls`. @@ -41,7 +46,7 @@ docker buildx create \ Finally, build the image using: ```shell -docker buildx bake \ +NON_ROOT_USER_ID=$(id -u) docker buildx bake \ --file dockerfiles/kuttl/docker-bake.hcl \ --file dockerfiles/kuttl/docker-bake.override.hcl \ --pull \ @@ -59,13 +64,5 @@ The image is configured with `kuttl` as the entrypoint. docker container run --rm -it ${USER}/hbase/kustomize/kuttl:latest --help ``` -Running tests in the image requires mounting the workspace into the container image and passing -appropriate parameters to `kuttl`. For example, run the "small" tests like this: - -```shell -docker container run \ - --mount type=bind,source=$(pwd),target=/workspace \ - --workdir /workspace \ - ${USER}/hbase/kustomize/kuttl:latest \ - --config tests/kuttl-test-small.yaml -``` +You can use this image to run the unit and integration tests. See +[tests/README.md](../../tests/README.md) for deatils. diff --git a/dockerfiles/kuttl/docker-bake.hcl b/dockerfiles/kuttl/docker-bake.hcl index dbb3695..5ca29fe 100644 --- a/dockerfiles/kuttl/docker-bake.hcl +++ b/dockerfiles/kuttl/docker-bake.hcl @@ -21,6 +21,12 @@ variable KUBECTL_VERSION { default = "1.24.10" } variable KUSTOMIZE_VERSION { default = "4.5.4" } variable KUTTL_VERSION { default = "0.15.0" } +variable ENVTEST_K8S_VERSION { default = "1.23.x" } + +variable USER {} +variable UID {} +variable NON_ROOT_USER { default = "${USER}" } +variable NON_ROOT_USER_ID { default = "${UID}" } variable KUBECTL_SHA_AMD64_URL {} variable KUBECTL_SHA_AMD64 {} @@ -44,7 +50,6 @@ variable KUSTOMIZE_BIN_ARM64_TGZ_URL {} variable KUSTOMIZE_BIN_ARM64_TGZ {} # output variables -variable USER {} variable IMAGE_NAME_REPOSITORY { default = "" } @@ -65,6 +70,9 @@ group default { target kuttl { dockerfile = "dockerfiles/kuttl/Dockerfile" args = { + ENVTEST_K8S_VERSION = ENVTEST_K8S_VERSION + NON_ROOT_USER = NON_ROOT_USER + NON_ROOT_USER_ID = NON_ROOT_USER_ID KUBECTL_SHA_AMD64_URL = KUBECTL_SHA_AMD64_URL KUBECTL_SHA_AMD64 = KUBECTL_SHA_AMD64 KUBECTL_BIN_AMD64_URL = KUBECTL_BIN_AMD64_URL diff --git a/overlays/hdfs/core-site.xml b/overlays/hdfs/core-site.xml new file mode 100644 index 0000000..782c61f --- /dev/null +++ b/overlays/hdfs/core-site.xml @@ -0,0 +1,96 @@ + + + + + fs.defaultFS + hdfs://${env.HADOOP_SERVICE} + + + fs.trash.interval + 10080 + + + fs.trash.checkpoint.interval + 10080 + + + ha.zookeeper.acl + world:anyone:rwcda + + + ha.zookeeper.auth + + + + ha.zookeeper.quorum + ${env.HA_ZOOKEEPER_QUORUM} + + + ha.zookeeper.parent-znode + / + + + hadoop.proxyuser.hdfs.hosts + * + + + hadoop.proxyuser.hdfs.users + * + + + hadoop.user.group.static.mapping.overrides + hdfs=supergroup;nobody=; + + + net.topology.script.file.name + /tmp/scripts/topology.sh + + + net.topology.script.number.args + 1 + + + hadoop.rpc.protection + authentication + + + hadoop.security.authorization + false + + + hadoop.ssl.client.conf + ssl-client.xml + + + hadoop.ssl.enabled + false + + + hadoop.ssl.keystores.factory.class + org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory + + + hadoop.ssl.require.client.cert + false + + + hadoop.ssl.server.conf + ssl-server.xml + + diff --git a/overlays/hdfs/dn-service.yaml b/overlays/hdfs/dn-service.yaml new file mode 100644 index 0000000..4151527 --- /dev/null +++ b/overlays/hdfs/dn-service.yaml @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +kind: Service +apiVersion: v1 +metadata: + name: datanode + labels: + jmxexporter: enabled +spec: + selector: + role: datanode + clusterIP: None + ports: + - name: jmxexporter + port: 8000 diff --git a/overlays/hdfs/dn-statefulset.yaml b/overlays/hdfs/dn-statefulset.yaml new file mode 100644 index 0000000..b03640d --- /dev/null +++ b/overlays/hdfs/dn-statefulset.yaml @@ -0,0 +1,222 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: datanode +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + role: datanode + serviceName: hadoop + template: + metadata: + labels: + role: datanode + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + role: datanode + topologyKey: kubernetes.io/hostname + containers: + - image: hadoop + name: datanode + command: + - /bin/bash + - -c + - |- + # Shell context so we can pull in the environment variables set in the container and + # via the env and envFrom. + # See https://stackoverflow.com/questions/57885828/netty-cannot-access-class-jdk-internal-misc-unsafe + HADOOP_LOGFILE="hdfs-${HOSTNAME}.log" \ + HDFS_DATANODE_OPTS=" \ + -XX:MaxRAMPercentage=${JVM_HEAP_PERCENTAGE_OF_RESOURCE_LIMIT} \ + -XX:InitialRAMPercentage=${JVM_HEAP_PERCENTAGE_OF_RESOURCE_LIMIT} \ + -Djava.security.properties=/tmp/scratch/java.security \ + -javaagent:${JMX_PROMETHEUS_JAR}=8000:/tmp/scratch/jmxexporter.yaml \ + -Djava.library.path=${HADOOP_HOME}/lib/native \ + --add-opens java.base/jdk.internal.misc=ALL-UNNAMED \ + -Dio.netty.tryReflectionSetAccessible=true \ + -Xlog:gc:/var/log/hadoop/gc.log:time,uptime:filecount=10,filesize=100M" \ + hdfs datanode + # For now, just fetch local /jmx + # Says kubelet only exposes failures, not success: https://stackoverflow.com/questions/34455040/kubernetes-liveness-probe-logging + # Do better. Check this DN successfully registered w/ NN. TODO. + livenessProbe: + httpGet: + path: /jmx?qry=java.lang:type=OperatingSystem + # 9865 if HTTPS + port: 9864 + initialDelaySeconds: 10 + periodSeconds: 10 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /jmx?qry=java.lang:type=OperatingSystem + # 9865 if HTTPS + port: 9864 + initialDelaySeconds: 10 + periodSeconds: 10 + failureThreshold: 3 + startupProbe: + httpGet: + path: /jmx?qry=java.lang:type=OperatingSystem + # 9865 if HTTPS + port: 9864 + initialDelaySeconds: 10 + failureThreshold: 30 + periodSeconds: 10 + resources: + requests: + cpu: '0.2' + memory: 1Gi + limits: + cpu: '1.0' + memory: 1.5Gi + envFrom: + - configMapRef: + name: environment + env: + # The 'node' this container is running on, not hdfs namenode. + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + ports: + - name: http + containerPort: 9864 + - name: https + containerPort: 9865 + - name: data + containerPort: 9866 + - name: ipc + containerPort: 9867 + - name: jmx + containerPort: 9864 + volumeMounts: + - mountPath: /etc/hadoop + name: hadoop-configuration + - mountPath: /var/log/hadoop + name: hadoop-logs + - mountPath: /tmp/scratch + name: scratch + - mountPath: /tmp/scripts + name: scripts + - mountPath: /data00 + name: data00 + initContainers: + - image: hadoop + name: bootstrapper + command: + - /bin/bash + - -c + - |- + set -xe + mkdir -p ${HADOOP_LOG_DIR} || echo $? + chown -R ${USER} ${HADOOP_LOG_DIR} + # If format-hdfs configmap present, format. + ! /tmp/scripts/exists_configmap.sh format-hdfs || ( + for dir in $( echo "${DATANODE_DATA_DIR}" | tr ',' '\n') + do + rm -rf ${dir} + done + ) + for dir in $( echo "${DATANODE_DATA_DIR}" | tr ',' '\n') + do + mkdir -p ${dir} || : + chown -R ${USER} ${dir} + done + df -h + cp /tmp/global-files/* /tmp/scratch/ + # Wait for the nns to come up. + /tmp/scripts/jmxping.sh namenode ${HADOOP_SERVICE} + securityContext: + # Run bootstrapper as root so can set ${USER} owner on data volume + allowPrivilegeEscalation: false + runAsUser: 0 + resources: + requests: + cpu: '0.2' + memory: 256Mi + limits: + cpu: '0.5' + memory: 512Mi + envFrom: + - configMapRef: + name: environment + env: + # Used by scripts that run during bootstrap + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + volumeMounts: + - mountPath: /data00 + name: data00 + - mountPath: /tmp/scripts + name: scripts + # Scratch dir is a location where init containers place items for later use + # by the main containers when they run. + - mountPath: /tmp/scratch + name: scratch + - mountPath: /tmp/global-files + name: global-files + serviceAccountName: hadoop + volumes: + - configMap: + name: hadoop-configuration + name: hadoop-configuration + - configMap: + name: scripts + defaultMode: 0555 + name: scripts + - configMap: + name: global-files + name: global-files + - emptyDir: {} + name: hadoop-logs + # Scratch dir is a location where init containers place items for later use + # by the main containers when they run. + - emptyDir: {} + name: scratch + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - metadata: + name: data00 + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 10Gi diff --git a/overlays/hdfs/hdfs-site.xml b/overlays/hdfs/hdfs-site.xml new file mode 100644 index 0000000..2ae4249 --- /dev/null +++ b/overlays/hdfs/hdfs-site.xml @@ -0,0 +1,274 @@ + + + + + dfs.block.replicator.classname + org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRackFaultTolerant + + + dfs.blocksize + 64m + + + dfs.datanode.address + 0.0.0.0:9866 + + + dfs.datanode.balance.bandwidthPerSec + 20m + + + dfs.datanode.balance.max.concurrent.moves + 100 + + + dfs.datanode.data.dir + ${env.DATANODE_DATA_DIR} + + + dfs.datanode.failed.volumes.tolerated + 0 + + + dfs.datanode.du.reserved + 1073741824 + + + dfs.datanode.fileio.profiling.sampling.percentage + 10 + + + dfs.datanode.http.address + 0.0.0.0:9864 + + + dfs.datanode.https.address + 0.0.0.0:9865 + + + dfs.datanode.ipc.address + 0.0.0.0:9867 + + + dfs.datanode.max.locked.memory + 0 + + + dfs.datanode.peer.stats.enabled + true + + + dfs.encrypt.data.transfer + false + + + dfs.encrypt.data.transfer.algorithm + rc4 + + + dfs.ha.automatic-failover.enabled + true + + + dfs.ha.fencing.methods + shell(/usr/bin/true) + + + dfs.journalnode.edits.dir + ${env.JOURNALNODE_DATA_DIR} + + + dfs.journalnode.http-address + 0.0.0.0:8480 + + + dfs.journalnode.https-address + 0.0.0.0:8481 + + + dfs.journalnode.rpc-address + 0.0.0.0:8485 + + + dfs.namenode.handler.count + 64 + + + + + dfs.namenode.http-bind-host + 0.0.0.0 + + + dfs.namenode.https-bind-host + 0.0.0.0 + + + dfs.namenode.name.dir + ${env.NAMENODE_DATA_DIR} + + + dfs.namenode.replication.max-streams + 20 + + + dfs.namenode.replication.max-streams-hard-limit + 40 + + + dfs.namenode.replication.min + ${env.DFS_REPLICATION} + + + dfs.namenode.replication.work.multiplier.per.iteration + 10 + + + dfs.namenode.safemode.threshold-pct + 0.9 + + + dfs.namenode.service.handler.count + 64 + + + dfs.nameservices + ${env.HADOOP_SERVICE} + + + dfs.reformat.disabled + false + + + dfs.replication + ${env.DFS_REPLICATION} + + + dfs.replication.max + 512 + + + ipc.8020.callqueue.impl + org.apache.hadoop.ipc.FairCallQueue + + + ipc.8020.scheduler.impl + org.apache.hadoop.ipc.DecayRpcScheduler + + + zk-dt-secret-manager.zkAuthType + digest + + + zk-dt-secret-manager.digest.auth + @/etc/hadoop/zookeeper/auth/zk-auth.txt + + + zk-dt-secret-manager.zkConnectionString + TODO + + + zk-dt-secret-manager.znodeWorkingPath + TODO + + + dfs.client.failover.proxy.provider.hadoop + org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider + + + + dfs.ha.namenodes.hadoop + namenode-0 + + + dfs.namenode.http-address.hadoop.namenode-0 + namenode-0.${env.HADOOP_SERVICE}.${env.POD_NAMESPACE}.${env.DOMAIN}:9870 + + + dfs.namenode.https-address.hadoop.namenode-0 + namenode-0.${env.HADOOP_SERVICE}.${env.POD_NAMESPACE}.${env.DOMAIN}:9871 + + + dfs.namenode.rpc-address.hadoop.namenode-0 + namenode-0.${env.HADOOP_SERVICE}.${env.POD_NAMESPACE}.${env.DOMAIN}:8020 + + + dfs.namenode.servicerpc-address.hadoop.namenode-0 + namenode-0.${env.HADOOP_SERVICE}.${env.POD_NAMESPACE}.${env.DOMAIN}:8022 + + + dfs.namenode.lifeline.rpc-address.hadoop.namenode-0 + namenode-0.${env.HADOOP_SERVICE}.${env.POD_NAMESPACE}.${env.DOMAIN}:8050 + + + dfs.client.https.keystore.resource + ssl-client.xml + + + dfs.client.https.need-auth + false + + + dfs.http.policy + ${env.HTTP_POLICY} + + + dfs.https.enable + ${env.DFS_HTTPS_ENABLE} + + + dfs.https.server.keystore.resource + ssl-server.xml + + + dfs.namenode.acls.enabled + true + + + dfs.datanode.use.datanode.hostname + true + + + dfs.client.use.datanode.hostname + true + + + + dfs.namenode.datanode.registration.ip-hostname-check + false + + + dfs.blockreport.intervalMsec + 900000 + Determines block reporting interval in milliseconds. + Report frequently else around recovery storms, the NN gets convinced + there is no block space left because of 'scheduled space' reserved. + + + diff --git a/overlays/hdfs/kustomization.yaml b/overlays/hdfs/kustomization.yaml new file mode 100644 index 0000000..6bd3abe --- /dev/null +++ b/overlays/hdfs/kustomization.yaml @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +commonLabels: + app: hadoop + +configMapGenerator: +- name: hadoop-configuration + # Add in single-instance namenode and datanode hdfs-site and core-site. + behavior: merge + files: + - hdfs-site.xml + - core-site.xml + +resources: +- nn-statefulset.yaml +- nn-service.yaml +- dn-statefulset.yaml +- dn-service.yaml +- ../../base diff --git a/overlays/hdfs/nn-service.yaml b/overlays/hdfs/nn-service.yaml new file mode 100644 index 0000000..a46b448 --- /dev/null +++ b/overlays/hdfs/nn-service.yaml @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +kind: Service +apiVersion: v1 +metadata: + name: namenode + labels: + jmxexporter: enabled +spec: + selector: + role: namenode + clusterIP: None + ports: + - name: rpc + port: 8020 + protocol: TCP + targetPort: 8020 + - name: http + port: 9870 + protocol: TCP + targetPort: 9870 + - name: jmxexporter + port: 8000 diff --git a/overlays/hdfs/nn-statefulset.yaml b/overlays/hdfs/nn-statefulset.yaml new file mode 100644 index 0000000..789e464 --- /dev/null +++ b/overlays/hdfs/nn-statefulset.yaml @@ -0,0 +1,326 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: namenode +spec: + minAvailable: 1 + selector: + matchLabels: + role: namenode +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: namenode +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + role: namenode + serviceName: hadoop + template: + metadata: + labels: + role: namenode + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: role + operator: In + values: + - namenode + topologyKey: kubernetes.io/hostname + containers: + - image: hadoop + name: namenode + imagePullPolicy: IfNotPresent + command: + - /bin/bash + - -c + - |- + # Shell context so we can pull in the environment variables set in the container and + # via the env and envFrom. + # See https://stackoverflow.com/questions/57885828/netty-cannot-access-class-jdk-internal-misc-unsafe + HADOOP_LOGFILE="hdfs-${HOSTNAME}.log" \ + HDFS_NAMENODE_OPTS=" \ + -XX:MaxRAMPercentage=${JVM_HEAP_PERCENTAGE_OF_RESOURCE_LIMIT} \ + -XX:InitialRAMPercentage=${JVM_HEAP_PERCENTAGE_OF_RESOURCE_LIMIT} \ + -Djava.security.properties=/tmp/scratch/java.security \ + -javaagent:${JMX_PROMETHEUS_JAR}=8000:/tmp/scratch/jmxexporter.yaml \ + -Djava.library.path=${HADOOP_HOME}/lib/native \ + --add-opens java.base/jdk.internal.misc=ALL-UNNAMED \ + -Dio.netty.tryReflectionSetAccessible=true \ + -Xlog:gc:/var/log/hadoop/gc.log:time,uptime:filecount=10,filesize=100M" \ + hdfs namenode + # For now, just fetch local /jmx + # Says kubelet only exposes failures, not success: https://stackoverflow.com/questions/34455040/kubernetes-liveness-probe-logging + livenessProbe: + httpGet: + path: /jmx?qry=java.lang:type=OperatingSystem + # 9871 if HTTPS + port: 9870 + initialDelaySeconds: 1 + failureThreshold: 6 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /jmx?qry=java.lang:type=OperatingSystem + # 9871 if HTTPS + port: 9870 + initialDelaySeconds: 10 + failureThreshold: 3 + periodSeconds: 10 + startupProbe: + httpGet: + path: /jmx?qry=java.lang:type=OperatingSystem + # 9871 if HTTPS + port: 9870 + initialDelaySeconds: 10 + failureThreshold: 30 + periodSeconds: 10 + resources: + requests: + cpu: '0.4' + memory: 2Gi + limits: + cpu: '1' + memory: 3Gi + envFrom: + - configMapRef: + name: environment + env: + # The 'node' this container is running on, not hdfs namenode. + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + ports: + - name: http + containerPort: 9870 + - name: https + containerPort: 9871 + - name: jmx + containerPort: 9870 + - name: rpc + containerPort: 8020 + - name: servicerpc + containerPort: 8022 + - name: lifelinerpc + containerPort: 8050 + volumeMounts: + - mountPath: /etc/hadoop + name: hadoop-configuration + - mountPath: /var/log/hadoop + name: hadoop-logs + - mountPath: /tmp/scratch + name: scratch + - mountPath: /tmp/scripts + name: scripts + - mountPath: /data00 + name: data00 + initContainers: + - image: hadoop + name: bootstrapper + imagePullPolicy: IfNotPresent + command: + # This container is running as root so can set permissions. + - /bin/bash + - -c + - |- + set -xe + if [ -n "${QJOURNAL}" ]; then + # If QJOURNAL, then HA and journalnodes are in the mix. Wait on them to come up. + /tmp/scripts/jmxping.sh journalnode ${HADOOP_SERVICE} + fi + # Copy over the files under global-files so in place for the runtime container. + cp /tmp/global-files/* /tmp/scratch/ + # Set perms + chown -R ${USER} ${HADOOP_LOG_DIR} + # If format-hdfs configmap present, format. + find ${NAMENODE_DATA_DIR} || : + ! /tmp/scripts/exists_configmap.sh format-hdfs || ( + rm -rf ${NAMENODE_DATA_DIR} + ) + chmod 777 /data00 + securityContext: + # Run bootstrapper as root so can set ${USER} owner on data volume + allowPrivilegeEscalation: false + runAsUser: 0 + resources: + requests: + cpu: '0.2' + memory: 256Mi + limits: + cpu: '0.5' + memory: 512Mi + envFrom: + - configMapRef: + name: environment + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + volumeMounts: + - mountPath: /etc/hadoop + name: hadoop-configuration + - mountPath: /var/log/hadoop + name: hadoop-logs + - mountPath: /data00 + name: data00 + - mountPath: /etc/hadoop/zookeeper/auth + name: zookeeper-credentials + readOnly: true + - mountPath: /tmp/scripts + name: scripts + # Scratch dir is a location where init containers place items for later use + # by the main containers when they run. + - mountPath: /tmp/scratch + name: scratch + - mountPath: /tmp/global-files + name: global-files + - image: hadoop + name: format-hdfs + imagePullPolicy: IfNotPresent + command: + # Runs as the image/hdfs user. + - /bin/bash + - -c + - |- + set -xe + find /data00 || echo $? + # Run format if no nn dir. + if [ ! -d "${NAMENODE_DATA_DIR}" ]; then + ordinal=$(echo $POD_NAME | sed -e 's/^[^-]*-\(.*\)/\1/') + case $ordinal in + 0) + hdfs namenode -format -nonInteractive || ( + # Perhaps another nn is active? If so, we should do bootstrap here instead. + hdfs namenode -bootstrapStandby -nonInteractive + ) + ;; + *) + hdfs namenode -bootstrapStandby -nonInteractive + ;; + esac + fi + resources: + requests: + cpu: '0.2' + memory: 256Mi + limits: + cpu: '0.5' + memory: 512Mi + envFrom: + - configMapRef: + name: environment + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + volumeMounts: + - mountPath: /etc/hadoop + name: hadoop-configuration + - mountPath: /var/log/hadoop + name: hadoop-logs + - mountPath: /data00 + name: data00 + - mountPath: /etc/hadoop/zookeeper/auth + name: zookeeper-credentials + readOnly: true + - mountPath: /tmp/scripts + name: scripts + # Scratch dir is a location where init containers place items for later use + # by the main containers when they run. + - mountPath: /tmp/scratch + name: scratch + serviceAccountName: hadoop + volumes: + - configMap: + name: hadoop-configuration + name: hadoop-configuration + - configMap: + name: scripts + defaultMode: 0555 + name: scripts + - configMap: + name: global-files + name: global-files + - emptyDir: {} + name: hadoop-logs + # Scratch dir is a location where init containers place items for later use + # by the main containers when they run. + - emptyDir: {} + name: scratch + - secret: + secretName: zookeeper-credentials + defaultMode: 400 + optional: true + name: zookeeper-credentials + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - metadata: + name: data00 + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 2Gi diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..a74ea0c --- /dev/null +++ b/tests/README.md @@ -0,0 +1,204 @@ + + +# Tests + +Kustomization tests are implemented using the [Kuttl](https://kuttl.dev/) testing harness. Tests +are divided into two groups: unit and integration. + +The unit test environment is light-weight and built on Kuttl's [Mocked Control +Plane](https://kuttl.dev/docs/testing/test-environments.html#mocked-control-plane) feature. The +tests themselves are limited to assertions based on the structure of resource requests made to the +kube API server. + +Integration tests can also be considered end-to-end tests. They rely on a "real", multi-node +kubernetes environment, be it a small [KinD](https://kind.sigs.k8s.io/) or +[MiniKube](https://minikube.sigs.k8s.io/) deployment or a fully distributed Kubernetes +cluster. Integration tests expect to see the results of the requests made against the API server +and make assertions of the presence of running Hadoop application processes. As such, the +integration testing environment must provide compute resources sufficient to run a small +HBase cluster. + +In both cases, tests are run using Kuttl. While you can install kuttl locally, it is recommended +that you use the `kuttl` container image provided by this project. It is described in +[dockerfiles/kuttl/README.md](../dockerfiles/kuttl/README.md). The following examples assume that +you have built the image locally. + +## Unit Tests + +Running tests in the `kuttl` image requires mounting the workspace into the container runtime and +passing appropriate parameters to `kuttl`. Run the unit tests like this: + +```shell +% docker container run \ + --mount type=bind,source=$(pwd),target=/workspace \ + --workdir /workspace \ + ${USER}/hbase/kustomize/kuttl:latest \ + --config tests/kuttl-test-unit.yaml +=== RUN kuttl + harness.go:462: starting setup + harness.go:246: running tests with a mocked control plane (kube-apiserver and etcd). +... +--- PASS: kuttl (5.81s) + --- PASS: kuttl/harness (0.00s) + --- PASS: kuttl/harness/base (1.35s) + --- PASS: kuttl/harness/overlays_hdfs (1.40s) +PASS +``` + +## Integration Tests + +This directory defines a set of tests that are suitable for running against a target cluster -- +they are not too resource intensive and do not require any vendor-specific extensions. It should +be possible to run these tests against a multi-node KinD cluster, below are some notes to help a +developer to run them locally. + +### Run the tests locally + +Assumes a Docker Desktop or some other docker-in-docker type of environment. First, prepare your +cluster connection details such that they can be passed into the container context. Next, launch +the test runner in a container: + +```shell +$ docker container run \ + --env KUBECONFIG=/workspace/your-kubeconfig \ + --mount type=bind,source=$(PWD),target=/workspace \ + -v /var/run/docker.sock:/var/run/docker.sock \ + --workdir /workspace \ + ${USER}/hbase/kustomize/kuttl:latest \ + --config tests/kuttl-test-integration.yaml \ + --parallel 1 +``` + +### Run the tests in AWS EKS + +It is possible to run these tests in AWS EKS. This requires configuring an RBAC on your target +cluster that maps to an AIM profile. Next, define a profile in AWS configuration. When you launch +the container, pass configuration and profile selection through to the running container. + +Building on the previous example, + +```shell +$ docker container run \ + --env AWS_PROFILE="your-profile" \ + --env KUBECONFIG=/workspace/your-kubeconfig \ + --mount type=bind,source=$(PWD),target=/workspace \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v ~/.aws:/root/.aws \ + --workdir /workspace \ + ${USER}/hbase/kustomize/kuttl:latest \ + --config tests/kuttl-test-integration.yaml +``` + +### Prepare a KinD cluster + +Ask KinD to create a cluster (and docker network), and export the configuration oriented as from +inside the cluster. Start by creating a kind-config.yaml and configuring it for muliple nodes. +See https://kind.sigs.k8s.io/docs/user/quick-start/#configuring-your-kind-cluster + +```shell +$ kind create cluster --config kind-config.yaml +... +You can now use your cluster with: + +kubectl cluster-info --context kind --kubeconfig kubeconfig +$ kind export kubeconfig --name kind --internal --kubeconfig kubeconfig-internal +``` + +### Local KinD Hacks + +Preparing and staging the large container images into the kind nodes is slow. Speed up the process +a bit by creating a single-node KinD cluster and letting `kuttl` populate the images you need. + +First, find all the images used in your tests, + +```shell +$ find tests/kind -type f -iname '*kustomization.yaml' \ + -exec yq '.images[] | .newName + ":" + .newTag' {} + \ + | sort -u +hadoop:... +hbase:... +zookeeper:... +``` + +Pull those images locally. + +```shell +$ docker image pull hadoop:... +$ docker image pull hbase:... +$ docker image pull zookeeper:... +``` + +Now make sure kuttl is using a docker volume for the containerd directory on each container, and +populate those images into your kuttl configuration using this config snippet: + +```yaml +kindNodeCache: + # Have kuttl create and mount volumes for a container image cache to each kind pod. Kuttl will + # reuse these mounts across runs, so we can save time the next the tests run. + true +kindContainers: + # pre-populate the kind containers with these images pulled from the host registry. They'll be + # cached via `kindNodeCache`. +- hadoop... +- hbase... +- zookeeper:... +``` + +When you run `kuttl` with this config, you'll see that it has mounted a volume for each container. +It'll take a while, but `kuttl` will report its progress copying these container images. + +``` +== RUN kuttl +... + harness.go:202: node mount point /var/lib/docker/volumes/kind-0/_data +... + harness.go:155: Starting KIND cluster + kind.go:66: Adding Containers to KIND... + kind.go:75: Add image zookeeper:... to node control-plane +... +``` + +Once copied into one volume, create all the additional volumes you'll need and clone the original. +Repeat this for every worker node you'd like in your cluster. + +```shell +$ docker volume create --name kind-1 +$ docker container run --rm -it \ + -v kind-0:/from \ + -v kind-1:/to \ + alpine ash -c "cd /from ; cp -a . /to" +``` + +In `kind-config.yaml`, specify the mount points for each of your KinD processes. + +```yaml +nodes: +- role: control-plane + extraMounts: + - &extra-mounts + hostPath: /var/lib/docker/volumes/kind-0/_data + containerPath: /var/lib/containerd + readOnly: false + propagation: HostToContainer +- role: worker + extraMounts: + - <<: *extra-mounts + hostPath: /var/lib/docker/volumes/kind-1/_data +... +``` diff --git a/tests/bin/kustomize_into_tmpdir.sh b/tests/bin/kustomize_into_tmpdir.sh new file mode 100755 index 0000000..7e5cef1 --- /dev/null +++ b/tests/bin/kustomize_into_tmpdir.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Test runner using KUTTL against a target cluster. +# https://kuttl.dev +# https://kind.sigs.k8s.io +# +# Materialize a kustomize directory for a kuttl test. +# +# Kustomize is clunky for automated testing. It's pretty opinionated in that it will only evaluate +# a directory off of disk -- you cannot generate a kustomization and pass it in via stdin. +# In order to use kuttl generated namespaces within the kustomization, we have to modify the +# kustomization.yaml before applying it. If we modify that file in the source tree, we end up with +# the test namespace appended to the file under source control. So, this script creates a temp +# directory, copies all the resources into that directory, and modifies the kustomization.yaml as +# necessary. It then runs `kubectl apply -k` against that temporary directory. +# + +declare DEBUG="${DEBUG:false}" +if [ "${DEBUG}" = 'true' ] ; then + set -x +fi + +set -eou pipefail + +declare NAMESPACE +declare NEW_RESOURCES='[]' +declare NEW_COMPONENTS='[]' +declare kustomize_dir +declare -a rewritten_resources=() +declare -a rewritten_components=() + +kustomize_dir="$(mktemp -d -p /tmp "${NAMESPACE}.XXXXXXXXXX")" +trap '[ -d "${kustomize_dir}" ] && rm -rf "${kustomize_dir}"' EXIT + +cp -r ./* "${kustomize_dir}/" + +for r in $(yq '.resources[]' kustomization.yaml) ; do + if [[ "${r}" =~ ^\.\./.* ]] ; then + # resolve the new relative location for any resource path that is not in the local directory + canonized="$(cd "${r}" ; pwd)" + r="../..${canonized}" + fi + rewritten_resources+=("'${r}'") +done +if [ "${#rewritten_resources[@]}" -gt 0 ] ; then + NEW_RESOURCES="[ $(printf '%s,' "${rewritten_resources[@]}") ]" +fi + +for r in $(yq '.components[]' kustomization.yaml) ; do + if [[ "${r}" =~ ^\.\./.* ]] ; then + # resolve the new relative location for any resource path that is not in the local directory + canonized="$(cd "${r}" ; pwd)" + r="../..${canonized}" + fi + rewritten_components+=("'${r}'") +done +if [ "${#rewritten_components[@]}" -gt 0 ] ; then + NEW_COMPONENTS="[ $(printf '%s,' "${rewritten_components[@]}") ]" +fi + +env NAMESPACE="${NAMESPACE}" \ + NEW_RESOURCES="${NEW_RESOURCES}" \ + NEW_COMPONENTS="${NEW_COMPONENTS}" \ + yq -i ' + .namespace = strenv(NAMESPACE) | + .resources = env(NEW_RESOURCES) | + .components = env(NEW_COMPONENTS) +' "${kustomize_dir}/kustomization.yaml" + +if [ "${DEBUG}" = 'true' ] ; then + cat "${kustomize_dir}/kustomization.yaml" +fi + +kubectl apply -k "${kustomize_dir}" diff --git a/tests/integration/overlays_hdfs/00-assert.yaml b/tests/integration/overlays_hdfs/00-assert.yaml new file mode 100644 index 0000000..899c485 --- /dev/null +++ b/tests/integration/overlays_hdfs/00-assert.yaml @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +# assert that there is a `StatefulSet` named "namenode" that has one live instance +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: namenode +status: + availableReplicas: 1 +--- +# assert that there is a `StatefulSet` named "datanode" that has one live instance +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: datanode +status: + availableReplicas: 1 diff --git a/tests/integration/overlays_hdfs/00-kustomize.yaml b/tests/integration/overlays_hdfs/00-kustomize.yaml new file mode 100644 index 0000000..3947f48 --- /dev/null +++ b/tests/integration/overlays_hdfs/00-kustomize.yaml @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: +- script: ../../bin/kustomize_into_tmpdir.sh diff --git a/tests/integration/overlays_hdfs/kustomization.yaml b/tests/integration/overlays_hdfs/kustomization.yaml new file mode 100644 index 0000000..ba9a7bf --- /dev/null +++ b/tests/integration/overlays_hdfs/kustomization.yaml @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: +- ../test_base +- ../../../overlays/hdfs diff --git a/tests/integration/test_base/kustomization.yaml b/tests/integration/test_base/kustomization.yaml new file mode 100644 index 0000000..387cf31 --- /dev/null +++ b/tests/integration/test_base/kustomization.yaml @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +commonLabels: + # Must repeat common labels and images in each overlay; can't inherit to keep each overlay independent + # https://github.com/kubernetes-sigs/kustomize/issues/915 + # This label is used to open up calico network acls + app: hadoop + +resources: +# When inter-pod networking is limited, apply this policy to open communications between pods that +# bear the "hadoop" label. +- networkpolicy.yaml diff --git a/tests/integration/test_base/networkpolicy.yaml b/tests/integration/test_base/networkpolicy.yaml new file mode 100644 index 0000000..8da0c53 --- /dev/null +++ b/tests/integration/test_base/networkpolicy.yaml @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +# Explicitly permit all traffic between Hadoop-related pods in our namespace +kind: NetworkPolicy +apiVersion: networking.k8s.io/v1 +metadata: + name: allow-all +spec: + podSelector: + matchLabels: + app: hadoop + ingress: + - {} + egress: + - {} diff --git a/tests/kuttl-test-integration.yaml b/tests/kuttl-test-integration.yaml new file mode 100644 index 0000000..3329cdf --- /dev/null +++ b/tests/kuttl-test-integration.yaml @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Test runner using KUTTL against a target cluster. +# https://kuttl.dev +# https://kind.sigs.k8s.io +# +# Test runner using KUTTL against a target cluster. +# https://kuttl.dev +# https://kind.sigs.k8s.io +--- +# Does not use Kuttl's built-in KIND support -- it doesn't quite work correctly with a VM-based +# (Docker Desktop) style of runtime. Instead, assumes the cluster is established outside of kuttl +# and configuration is provided via `--env`. +apiVersion: kuttl.dev/v1beta1 +kind: TestSuite +testDirs: +- ./src/test/kustomize/integration +timeout: + # these tests allocate several pods with dependencies between them, allow some time for + # everything to launch and settle. + 300 +reportName: TEST-kuttl-report-integration +reportFormat: xml diff --git a/tests/kuttl-test-unit.yaml b/tests/kuttl-test-unit.yaml new file mode 100644 index 0000000..db0ade0 --- /dev/null +++ b/tests/kuttl-test-unit.yaml @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Test runner using KUTTL against a target cluster. +# https://kuttl.dev +# https://kind.sigs.k8s.io +# Test runner using https://kuttl.dev +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestSuite +startControlPlane: true +testDirs: +- ./tests/unit +reportName: TEST-kuttl-report-unit +reportFormat: xml diff --git a/tests/unit/base/00-assert.yaml b/tests/unit/base/00-assert.yaml new file mode 100644 index 0000000..1e24de4 --- /dev/null +++ b/tests/unit/base/00-assert.yaml @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +# assert that there is a `ConfigMap` named "environment" +apiVersion: v1 +kind: ConfigMap +metadata: + name: environment +--- +# assert that there is a `ConfigMap` named "global-files" +apiVersion: v1 +kind: ConfigMap +metadata: + name: global-files +# TODO: kuttl has no means to express `any` value, so cannot assert on data keys. +#data: +# java.security: ... +# jmxexporter.yaml: ... +# ssl-client.xml: ... +# ssl-server.xml: ... +--- +# assert that there is a `ConfigMap` named "hadoop-configuration-XXX" +# TODO: kuttl does not support generated names +#apiVersion: v1 +#kind: ConfigMap +#metadata: +# name: hadoop-configuration-c94h8k249d +# TODO: kuttl has no means to express `any` value, so cannot assert on data keys. +#data: +# log4j.properties: ... +--- +# assert that there is a `ConfigMap` named "scripts" +apiVersion: v1 +kind: ConfigMap +metadata: + name: scripts +# TODO: kuttl has no means to express `any` value, so cannot assert on data keys. +#data: +# apiserver_access.sh: ... +# ... +--- +# assert that there is a `Secret` named "keystore-password" +apiVersion: v1 +kind: Secret +metadata: + name: keystore-password +type: Opaque +--- +# assert that there is a `Service` names "hadoop" +apiVersion: v1 +kind: Service +metadata: + name: hadoop +--- +# assert that there is a `Job` named "delete-format-hdfs-configmap" +apiVersion: batch/v1 +kind: Job +metadata: + name: delete-format-hdfs-configmap diff --git a/tests/unit/base/00-kustomize.yaml b/tests/unit/base/00-kustomize.yaml new file mode 100644 index 0000000..3947f48 --- /dev/null +++ b/tests/unit/base/00-kustomize.yaml @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: +- script: ../../bin/kustomize_into_tmpdir.sh diff --git a/tests/unit/base/README.md b/tests/unit/base/README.md new file mode 100644 index 0000000..b4839d4 --- /dev/null +++ b/tests/unit/base/README.md @@ -0,0 +1,24 @@ + + +# tests/unit/base + +A collection of asserts on the resources allocated by `base` that are not explicitly covered by a +more specific test case. + +Hopefully the scope of this test case shrinks over time. diff --git a/tests/unit/base/kustomization.yaml b/tests/unit/base/kustomization.yaml new file mode 100644 index 0000000..e0ed1f4 --- /dev/null +++ b/tests/unit/base/kustomization.yaml @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ../../../base diff --git a/tests/unit/overlays_hdfs/00-assert.yaml b/tests/unit/overlays_hdfs/00-assert.yaml new file mode 100644 index 0000000..cf36710 --- /dev/null +++ b/tests/unit/overlays_hdfs/00-assert.yaml @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +# assert that there is a `ConfigMap` named "hadoop-configuration-XXX" +# TODO: kuttl does not support generated names +#apiVersion: v1 +#kind: ConfigMap +#metadata: +# name: hadoop-configuration-c94h8k249d +# TODO: kuttl has no means to express `any` value, so cannot assert on data keys. +#data: +# log4j.properties: ... +# hdfs-site.xml: ... +# core-site.xml: ... +--- +# assert that there is a `PodDisruptionBudget` named "namenode" +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: namenode +--- +# assert that there is a `StatefulSet` named "namenode" that it provides pods labeled role:namenode +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: namenode +spec: + template: + metadata: + labels: + role: namenode +--- +# assert that there is a `Service` named "namenode" pointing to pods labeled role:namenode +apiVersion: v1 +kind: Service +metadata: + name: namenode +spec: + selector: + role: namenode +--- +# assert that there is a `StatefulSet` named "datanode" that it provides pods labeled role:datanode +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: datanode +spec: + template: + metadata: + labels: + role: datanode +--- +# assert that there is a `Service` named "datanode" pointing to pods labeled role:datanode +apiVersion: v1 +kind: Service +metadata: + name: datanode +spec: + selector: + role: datanode diff --git a/tests/unit/overlays_hdfs/00-kustomize.yaml b/tests/unit/overlays_hdfs/00-kustomize.yaml new file mode 100644 index 0000000..3947f48 --- /dev/null +++ b/tests/unit/overlays_hdfs/00-kustomize.yaml @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: +- script: ../../bin/kustomize_into_tmpdir.sh diff --git a/tests/unit/overlays_hdfs/kustomization.yaml b/tests/unit/overlays_hdfs/kustomization.yaml new file mode 100644 index 0000000..cd111fc --- /dev/null +++ b/tests/unit/overlays_hdfs/kustomization.yaml @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: +- ../../../overlays/hdfs From 23008bd108da5a6cc92af3d876a429163683ae24 Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Sat, 13 Jan 2024 17:14:38 +0100 Subject: [PATCH 2/2] address (most) pre-commit nits --- .gitignore | 2 + .yamllint.yaml | 2 +- base/environment-configmap.yaml | 7 ++-- base/kustomization.yaml | 2 +- base/rbac.yaml | 3 +- .../jenkins/jenkins_precommit_github_yetus.sh | 3 +- dockerfiles/kuttl/README.md | 2 +- overlays/hdfs/dn-statefulset.yaml | 39 ++++++++++--------- overlays/hdfs/nn-statefulset.yaml | 38 +++++++++--------- tests/README.md | 13 ++++--- .../integration/test_base/kustomization.yaml | 3 +- 11 files changed, 61 insertions(+), 53 deletions(-) diff --git a/.gitignore b/.gitignore index 5142412..f6255fe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +# test-patch output +output # kuttl detritus TEST-kuttl-report-unit.xml diff --git a/.yamllint.yaml b/.yamllint.yaml index 5d92049..5e7a915 100644 --- a/.yamllint.yaml +++ b/.yamllint.yaml @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. --- -extends: default +extends: relaxed rules: line-length: diff --git a/base/environment-configmap.yaml b/base/environment-configmap.yaml index d018c22..71155f1 100644 --- a/base/environment-configmap.yaml +++ b/base/environment-configmap.yaml @@ -16,7 +16,7 @@ # # Common environment variables shared across pods. # Include w/ the 'envFrom:' directive. -# We have to be pendantic in here. We cannot have a value +# We have to be pedantic in here. We cannot have a value # refer to a define made earlier; the interpolation # doesn't work. --- @@ -32,12 +32,13 @@ data: # See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id # The headless-service is defined in the adjacent rbac.yaml. # Matches the serviceName we have on our statefulsets. - # Required that we create it according to https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#limitations + # Required that we create it according to + # https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#limitations HADOOP_SERVICE: hadoop # dfs.http.policy # If HTTPS_ONLY or HTTPS_OR_HTTP then we'll depend on https in UI and jmx'ing # and will adjust schema and ports accordingly. If https, we need to get certificates - # so cert-manager, etc., needs to be instaled. + # so cert-manager, etc., needs to be installed. HTTP_POLICY: HTTP_ONLY DFS_HTTPS_ENABLE: "false" HBASE_SSL_ENABLED: "false" diff --git a/base/kustomization.yaml b/base/kustomization.yaml index 43dd57c..5121177 100644 --- a/base/kustomization.yaml +++ b/base/kustomization.yaml @@ -41,7 +41,7 @@ configMapGenerator: disableNameSuffixHash: true - name: global-files # Add files used by most/all processes into a global configuration configmap - # accessible to all processes. The environment-configmap defines env varibles used by + # accessible to all processes. The environment-configmap defines env variables used by # all processes and pods. This configmap loads files used by each process. files: - jmxexporter.yaml diff --git a/base/rbac.yaml b/base/rbac.yaml index 29e9c89..4944be4 100644 --- a/base/rbac.yaml +++ b/base/rbac.yaml @@ -91,7 +91,8 @@ metadata: # Matches the ServiceAccount above referenced by statefulsets # in their serviceName. # See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id -# This is required for statefulsets. See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#limitations +# This is required for statefulsets. +# See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#limitations apiVersion: v1 kind: Service metadata: diff --git a/dev-support/jenkins/jenkins_precommit_github_yetus.sh b/dev-support/jenkins/jenkins_precommit_github_yetus.sh index d3c618f..54890de 100755 --- a/dev-support/jenkins/jenkins_precommit_github_yetus.sh +++ b/dev-support/jenkins/jenkins_precommit_github_yetus.sh @@ -67,7 +67,6 @@ DOCKER_ARGS+=('--platform' 'linux/amd64') DOCKER_ARGS+=('--quiet') DOCKER_ARGS+=('--rm') DOCKER_ARGS+=('--workdir' "${MOUNT_DIR}") -DOCKER_ARGS+=('--user' "$(id -u):$(id -g)") # path to test-patch in the container image TESTPATCHBIN="/usr/bin/test-patch" @@ -120,7 +119,7 @@ YETUS_ARGS+=("--build-url-artifacts=${BUILD_URL_ARTIFACTS}") YETUS_ARGS+=("--user-plugins=${MOUNT_DIR}/${SOURCEDIR_REL}/dev-support/jenkins/yetus_plugins.d") # plugins to enable YETUS_ARGS+=("--plugins=${PLUGINS}") -YETUS_ARGS+=("--tests-filter=test4tests") +YETUS_ARGS+=("--tests-filter=test4tests,shelldocs") # help keep the ASF boxes clean YETUS_ARGS+=("--sentinel") YETUS_ARGS+=("--github-token=${GITHUB_TOKEN}") diff --git a/dockerfiles/kuttl/README.md b/dockerfiles/kuttl/README.md index 3da7e63..7507077 100644 --- a/dockerfiles/kuttl/README.md +++ b/dockerfiles/kuttl/README.md @@ -65,4 +65,4 @@ docker container run --rm -it ${USER}/hbase/kustomize/kuttl:latest --help ``` You can use this image to run the unit and integration tests. See -[tests/README.md](../../tests/README.md) for deatils. +[tests/README.md](../../tests/README.md) for details. diff --git a/overlays/hdfs/dn-statefulset.yaml b/overlays/hdfs/dn-statefulset.yaml index b03640d..ef5c7ee 100644 --- a/overlays/hdfs/dn-statefulset.yaml +++ b/overlays/hdfs/dn-statefulset.yaml @@ -41,26 +41,27 @@ spec: - image: hadoop name: datanode command: - - /bin/bash - - -c - - |- - # Shell context so we can pull in the environment variables set in the container and - # via the env and envFrom. - # See https://stackoverflow.com/questions/57885828/netty-cannot-access-class-jdk-internal-misc-unsafe - HADOOP_LOGFILE="hdfs-${HOSTNAME}.log" \ - HDFS_DATANODE_OPTS=" \ - -XX:MaxRAMPercentage=${JVM_HEAP_PERCENTAGE_OF_RESOURCE_LIMIT} \ - -XX:InitialRAMPercentage=${JVM_HEAP_PERCENTAGE_OF_RESOURCE_LIMIT} \ - -Djava.security.properties=/tmp/scratch/java.security \ - -javaagent:${JMX_PROMETHEUS_JAR}=8000:/tmp/scratch/jmxexporter.yaml \ - -Djava.library.path=${HADOOP_HOME}/lib/native \ - --add-opens java.base/jdk.internal.misc=ALL-UNNAMED \ - -Dio.netty.tryReflectionSetAccessible=true \ - -Xlog:gc:/var/log/hadoop/gc.log:time,uptime:filecount=10,filesize=100M" \ - hdfs datanode + - /bin/bash + - -c + - |- + # Shell context so we can pull in the environment variables set in the container and + # via the env and envFrom. + # See https://stackoverflow.com/questions/57885828/netty-cannot-access-class-jdk-internal-misc-unsafe + HADOOP_LOGFILE="hdfs-${HOSTNAME}.log" \ + HDFS_DATANODE_OPTS=" \ + -XX:MaxRAMPercentage=${JVM_HEAP_PERCENTAGE_OF_RESOURCE_LIMIT} \ + -XX:InitialRAMPercentage=${JVM_HEAP_PERCENTAGE_OF_RESOURCE_LIMIT} \ + -Djava.security.properties=/tmp/scratch/java.security \ + -javaagent:${JMX_PROMETHEUS_JAR}=8000:/tmp/scratch/jmxexporter.yaml \ + -Djava.library.path=${HADOOP_HOME}/lib/native \ + --add-opens java.base/jdk.internal.misc=ALL-UNNAMED \ + -Dio.netty.tryReflectionSetAccessible=true \ + -Xlog:gc:/var/log/hadoop/gc.log:time,uptime:filecount=10,filesize=100M" \ + hdfs datanode # For now, just fetch local /jmx - # Says kubelet only exposes failures, not success: https://stackoverflow.com/questions/34455040/kubernetes-liveness-probe-logging - # Do better. Check this DN successfully registered w/ NN. TODO. + # Doc says kubelet only exposes failures, not success + # https://stackoverflow.com/questions/34455040/kubernetes-liveness-probe-logging + # TODO: Do better. Check this DN successfully registered w/ NN. livenessProbe: httpGet: path: /jmx?qry=java.lang:type=OperatingSystem diff --git a/overlays/hdfs/nn-statefulset.yaml b/overlays/hdfs/nn-statefulset.yaml index 789e464..8e94c71 100644 --- a/overlays/hdfs/nn-statefulset.yaml +++ b/overlays/hdfs/nn-statefulset.yaml @@ -55,25 +55,26 @@ spec: name: namenode imagePullPolicy: IfNotPresent command: - - /bin/bash - - -c - - |- - # Shell context so we can pull in the environment variables set in the container and - # via the env and envFrom. - # See https://stackoverflow.com/questions/57885828/netty-cannot-access-class-jdk-internal-misc-unsafe - HADOOP_LOGFILE="hdfs-${HOSTNAME}.log" \ - HDFS_NAMENODE_OPTS=" \ - -XX:MaxRAMPercentage=${JVM_HEAP_PERCENTAGE_OF_RESOURCE_LIMIT} \ - -XX:InitialRAMPercentage=${JVM_HEAP_PERCENTAGE_OF_RESOURCE_LIMIT} \ - -Djava.security.properties=/tmp/scratch/java.security \ - -javaagent:${JMX_PROMETHEUS_JAR}=8000:/tmp/scratch/jmxexporter.yaml \ - -Djava.library.path=${HADOOP_HOME}/lib/native \ - --add-opens java.base/jdk.internal.misc=ALL-UNNAMED \ - -Dio.netty.tryReflectionSetAccessible=true \ - -Xlog:gc:/var/log/hadoop/gc.log:time,uptime:filecount=10,filesize=100M" \ - hdfs namenode + - /bin/bash + - -c + - |- + # Shell context so we can pull in the environment variables set in the container and + # via the env and envFrom. + # See https://stackoverflow.com/questions/57885828/netty-cannot-access-class-jdk-internal-misc-unsafe + HADOOP_LOGFILE="hdfs-${HOSTNAME}.log" \ + HDFS_NAMENODE_OPTS=" \ + -XX:MaxRAMPercentage=${JVM_HEAP_PERCENTAGE_OF_RESOURCE_LIMIT} \ + -XX:InitialRAMPercentage=${JVM_HEAP_PERCENTAGE_OF_RESOURCE_LIMIT} \ + -Djava.security.properties=/tmp/scratch/java.security \ + -javaagent:${JMX_PROMETHEUS_JAR}=8000:/tmp/scratch/jmxexporter.yaml \ + -Djava.library.path=${HADOOP_HOME}/lib/native \ + --add-opens java.base/jdk.internal.misc=ALL-UNNAMED \ + -Dio.netty.tryReflectionSetAccessible=true \ + -Xlog:gc:/var/log/hadoop/gc.log:time,uptime:filecount=10,filesize=100M" \ + hdfs namenode # For now, just fetch local /jmx - # Says kubelet only exposes failures, not success: https://stackoverflow.com/questions/34455040/kubernetes-liveness-probe-logging + # Doc says kubelet only exposes failures, not success: + # https://stackoverflow.com/questions/34455040/kubernetes-liveness-probe-logging livenessProbe: httpGet: path: /jmx?qry=java.lang:type=OperatingSystem @@ -310,6 +311,7 @@ spec: - emptyDir: {} name: scratch - secret: + # pragma: allowlist nextline secret secretName: zookeeper-credentials defaultMode: 400 optional: true diff --git a/tests/README.md b/tests/README.md index a74ea0c..295a5da 100644 --- a/tests/README.md +++ b/tests/README.md @@ -108,8 +108,8 @@ $ docker container run \ ### Prepare a KinD cluster Ask KinD to create a cluster (and docker network), and export the configuration oriented as from -inside the cluster. Start by creating a kind-config.yaml and configuring it for muliple nodes. -See https://kind.sigs.k8s.io/docs/user/quick-start/#configuring-your-kind-cluster +inside the cluster. Start by creating a kind-config.yaml and configuring it for multiple nodes. +See ```shell $ kind create cluster --config kind-config.yaml @@ -139,9 +139,9 @@ zookeeper:... Pull those images locally. ```shell -$ docker image pull hadoop:... -$ docker image pull hbase:... -$ docker image pull zookeeper:... +docker image pull hadoop:... +docker image pull hbase:... +docker image pull zookeeper:... ``` Now make sure kuttl is using a docker volume for the containerd directory on each container, and @@ -163,7 +163,8 @@ kindContainers: When you run `kuttl` with this config, you'll see that it has mounted a volume for each container. It'll take a while, but `kuttl` will report its progress copying these container images. -``` +```sh +$ kuttl ... == RUN kuttl ... harness.go:202: node mount point /var/lib/docker/volumes/kind-0/_data diff --git a/tests/integration/test_base/kustomization.yaml b/tests/integration/test_base/kustomization.yaml index 387cf31..48abbee 100644 --- a/tests/integration/test_base/kustomization.yaml +++ b/tests/integration/test_base/kustomization.yaml @@ -18,7 +18,8 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization commonLabels: - # Must repeat common labels and images in each overlay; can't inherit to keep each overlay independent + # Must repeat common labels and images in each overlay; can't inherit to keep each overlay + # independent # https://github.com/kubernetes-sigs/kustomize/issues/915 # This label is used to open up calico network acls app: hadoop