diff --git a/.github/dependabot.yml b/.github/dependabot.yml index b453b546..1a0ab989 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -7,6 +7,20 @@ updates: interval: daily time: "09:00" timezone: "Asia/Taipei" + groups: + minor-and-patch: + applies-to: version-updates + update-types: + - "minor" + - "patch" + sec-minor-and-patch: + applies-to: security-updates + update-types: + - "minor" + - "patch" + ignore: + - dependency-name: "*" + update-types: ["version-update:semver-major"] - package-ecosystem: gomod directory: / diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index dc40fb99..d8c941a7 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -23,7 +23,7 @@ jobs: persist-credentials: false - name: "Run analysis" - uses: ossf/scorecard-action@f49aabe0b5af0936a0987cfb85d86b75731b0186 # v2.4.1 + uses: ossf/scorecard-action@05b42c624433fc40578a4040d5cf5e36ddca8cde # v2.4.2 with: results_file: results.sarif results_format: sarif @@ -37,6 +37,6 @@ jobs: retention-days: 5 - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@60168efe1c415ce0f5521ea06d5c2062adbeed1b # v3.28.17 + uses: github/codeql-action/upload-sarif@fca7ace96b7d713c7035871441bd52efbe39e27e # v3.28.19 with: sarif_file: results.sarif diff --git a/README.md b/README.md index e806816c..1aafc3db 100644 --- a/README.md +++ b/README.md @@ -1,68 +1,32 @@
- Logo + Logo

[![License][License-Image]][License-Url] [![made-with-Go][Go-Made-Image]][Go-Made-Url] [![Go][Go-Report-Image]][Go-Report-Url] [![GitHub issues][Github-Issue-Image]][Github-Issue-Url] [![GitHub last commit (branch)][GitHub-Last-Commit-Image]][GitHub-Last-Commit-Url] -

- -

▎To Start Developing

- -
- -0). Get the build environment ready - -We would need an `x86_64` / `amd64` based machine to build the rpm package. - -We would need to have `golang` ready on the machine. - -We would need rpm build tools. - -For Fedora Linux based OS: +🚀 [API] | ⛩️ [Architecture] | 👷‍♂️ [Developing] | 🔬 [Troubleshooting] -```bash -sudo dnf install -y rpmdevtools rpmlint -``` +

-
+## ▎Overview -1). Build rpm +The cube-cos api is a central communication mechanism in the CubeCOS written in [Go]. Each node has its own cube-cos api and discover peer nodes by [MDNS] for cross-node communication. -```bash -task rpm:build -``` +Additionally, there’re 14+ apis in the CubeCOS, the cube-cos api is just one of apis which responsible for the partial native features of cube-cos currently, but it will cover more and more features in the incoming milestones.
-2). Send the built rpm to a running CubeCOS - -``` -scp @: -``` +Logo
- -3). Install the rpm and start the service - -```bash -systemctl stop cube-cos-api -dnf -y remove cube-cos-api -dnf -y install "" -hex_config bootstrap api -``` -
- ---- -
-

▎License

- +## ▎License Copyright (c) 2025 [Bigstack co., ltd](https://bigstack.co/) @@ -79,6 +43,12 @@ See the License for the specific language governing permissions and limitations under the License. +[API]: https://github.com/bigstack-oss/cube-cos-api/tree/develop/docs/api +[Architecture]: https://github.com/bigstack-oss/cube-cos-api/tree/develop/docs/architecture +[Developing]: https://github.com/bigstack-oss/cube-cos-api/tree/develop/docs/developing +[Troubleshooting]: https://github.com/bigstack-oss/cube-cos-api/tree/develop/docs/troubleshooting +[Go]: https://go.dev/ +[MDNS]: https://en.wikipedia.org/wiki/Multicast_DNS [License-Url]: https://www.apache.org/licenses/LICENSE-2.0 [License-Image]: https://img.shields.io/badge/License-Apache2-blue.svg [Go-Report-Url]: https://goreportcard.com/report/github.com/bigstack-oss/cube-cos-api diff --git a/assets/images/architecture/base.png b/assets/images/architecture/base.png new file mode 100644 index 00000000..7a5a4280 Binary files /dev/null and b/assets/images/architecture/base.png differ diff --git a/assets/images/architecture/component-feature-impact.png b/assets/images/architecture/component-feature-impact.png new file mode 100644 index 00000000..5514885b Binary files /dev/null and b/assets/images/architecture/component-feature-impact.png differ diff --git a/assets/images/architecture/data-preparation.png b/assets/images/architecture/data-preparation.png new file mode 100644 index 00000000..c9399ed7 Binary files /dev/null and b/assets/images/architecture/data-preparation.png differ diff --git a/assets/images/architecture/data-request.png b/assets/images/architecture/data-request.png new file mode 100644 index 00000000..9757310c Binary files /dev/null and b/assets/images/architecture/data-request.png differ diff --git a/assets/images/architecture/mdns-broadcast.png b/assets/images/architecture/mdns-broadcast.png new file mode 100644 index 00000000..11395406 Binary files /dev/null and b/assets/images/architecture/mdns-broadcast.png differ diff --git a/assets/images/architecture/mdns-query.png b/assets/images/architecture/mdns-query.png new file mode 100644 index 00000000..708327a3 Binary files /dev/null and b/assets/images/architecture/mdns-query.png differ diff --git a/assets/images/architecture/mdns-receive.png b/assets/images/architecture/mdns-receive.png new file mode 100644 index 00000000..f0ebf00c Binary files /dev/null and b/assets/images/architecture/mdns-receive.png differ diff --git a/assets/images/architecture/mdns-response.png b/assets/images/architecture/mdns-response.png new file mode 100644 index 00000000..58f6ad55 Binary files /dev/null and b/assets/images/architecture/mdns-response.png differ diff --git a/assets/images/architecture/node-access-delegate.png b/assets/images/architecture/node-access-delegate.png new file mode 100644 index 00000000..950daebd Binary files /dev/null and b/assets/images/architecture/node-access-delegate.png differ diff --git a/assets/images/architecture/node-components.png b/assets/images/architecture/node-components.png new file mode 100644 index 00000000..7c73965d Binary files /dev/null and b/assets/images/architecture/node-components.png differ diff --git a/assets/images/architecture/node-view.png b/assets/images/architecture/node-view.png new file mode 100644 index 00000000..5b588d8b Binary files /dev/null and b/assets/images/architecture/node-view.png differ diff --git a/assets/images/architecture/operation-request.png b/assets/images/architecture/operation-request.png new file mode 100644 index 00000000..6a2766ba Binary files /dev/null and b/assets/images/architecture/operation-request.png differ diff --git a/assets/images/bigstack.png b/assets/images/panel/api.png similarity index 100% rename from assets/images/bigstack.png rename to assets/images/panel/api.png diff --git a/assets/videos/api/interfaces.mov b/assets/videos/api/interfaces.mov new file mode 100644 index 00000000..ef774a7e Binary files /dev/null and b/assets/videos/api/interfaces.mov differ diff --git a/configs/cube-cos-api.yaml.template b/configs/cube-cos-api.yaml.template index 9d7be9de..c03b80ba 100644 --- a/configs/cube-cos-api.yaml.template +++ b/configs/cube-cos-api.yaml.template @@ -54,7 +54,8 @@ spec: database: cube-cos replicaSet: cube-cos-rs auth: - enabled: true + enable: true + source: admin username: admin password: admin influxdb: diff --git a/docs/api/README.md b/docs/api/README.md new file mode 100644 index 00000000..4b116da6 --- /dev/null +++ b/docs/api/README.md @@ -0,0 +1,39 @@ +## ▎Overiew + +The following guide is for anyone who want to see the comperhensive list for API usage. + +Genearlly, there're two ways to find it out + +- Access on CubeCOS + +- Copy JSON to Swagger online editor + +You should be able to see the doc like the video below via either way + +https://github.com/bigstack-oss/cube-cos-api/blob/develop/assets/videos/api/interfaces.mov + +
+ +## ▎Access on CubeCOS + +1). Log in your CubeCOS by UI + +
+ +2). Paste the URL below in the browser + +- conention: https://{CubeCOS's Virtual IP}/api/v1/datacenters/{data center name}/apidocs/index.html + +- for example: https://10.32.45.10/api/v1/datacenters/demo-site/apidocs/index.html + +
+ +## ▎Swagger Online Editor + +1). Copy the JSON content from https://github.com/bigstack-oss/cube-cos-api/blob/develop/api/docs.json + +
+ +2). Paste it to the Swagger's online editor + +https://editor.swagger.io/ diff --git a/docs/architecture/README.md b/docs/architecture/README.md new file mode 100644 index 00000000..8f4aa28d --- /dev/null +++ b/docs/architecture/README.md @@ -0,0 +1,268 @@ +## ▎Architecture - Base + +Start from the cube-cos 3.0.0, cube-cos api replace the LMI(legacy UI + API stack) to become a central communication mechanism in the cube-cos. each node has its own cube-cos api and discover peer nodes by MDNS for cross-node communication + +
+ +Logo + +
+
+ +Additionally, there’re `14+ apis in the cube-cos`, the `cube-cos api` is just one of apis which responsible for the `partial native features` of CubeCOS currently and will cover more and more features in the incoming milestones. + +```bash +(the apis below not includes the k3s api, rancher api, ceph api, and so on...) + +$ systemctl --type=service | grep api + cube-cos-api.service loaded active running CubeCosApi + cyborg-api.service loaded active running OpenStack Acceleration API service + designate-api.service loaded active running OpenStack Designate DNSaaS API + masakari-api.service loaded active running OpenStack Masakari Api service + octavia-api.service loaded active running OpenStack Octavia API service + openstack-cinder-api.service loaded active running OpenStack Cinder API Server + openstack-glance-api.service loaded active running OpenStack Image Service (code-named Glance) API server + openstack-heat-api-cfn.service loaded active running Openstack Heat CFN-compatible API Service + openstack-heat-api.service loaded active running OpenStack Heat API Service + openstack-manila-api.service loaded active running OpenStack Manila API Server + openstack-nova-api.service loaded active running OpenStack Nova API Server + openstack-senlin-api.service loaded active running OpenStack Senlin API Server + openstack-watcher-api.service loaded active running OpenStack Watcher API service + skyline-apiserver.service loaded active running Skyline APIServer +``` + +
+
+ +## ▎Architecture - Service Discovery + +cube-cos api discover each other through MDNS protocol(`UDP 5353 port`) with a `data center identity`: `{data center name}-{virtual ip}-{first 8 chars of keycloak odic secret}` + +for example: `control-10.32.45.10-g2u1bojz`. the api will broadcast its node details to all peer nodes for `every 20s`. + +
+ +Logo + +
+
+ +the payload in the MDNS broadcast is like, for example: + +```bash +{ + "metadata": { + "broker": "http", + "dataCenter": "control", + "hostname": "cube451", + "ip": "10.32.45.1", + "isGpuEnabled": "false", + "nodeID": "fd3b8e3f", + "protocol": "http", + "registry": "mdns", + "role": "control-converged", + "serialNumber": "1MXXZH2", + "server": "http" + }, + "service": "control-10.32.45.10-g2u1bojz", + "version": "latest", + "endpoints": null +} +``` + +
+ +all apis will receive node details through the flow above by identifying the data center identity, then resync the data in the pre-stored data place. + +Logo + +
+
+ +the TTL of the node details in each node is 60s, when the record is expired, the cube-cos api will ask “who owned the service for {data center identity}” via MDNS broadcast to resync the node list again (⚠️ /etc/settings.cluster.json will also be involved in the process of node sync to know who should be online +) + +Logo + +
+
+ +Logo + +
+
+ +for request communication or delegation, each cube-cos api will know whether the request should be operated locally or delegate to other peer nodes (internal node communication also requires token auth). + +Logo + +
+
+
+
+ +## ▎Architecture - Inside A Node + +from the perspective of single node, there’re 6 layers co-working with the api to handle different types of request. + +
+ +Logo + +
+
+ +the R&R, components, and purpose from the each layer is like + +
+ +Logo + +
+
+ +- auth layer + - check, verify, allow or block the request +- transfer layer + - do protocol termination and port forward +- core layer + - the core services of cube-cos, provides everything we need for HCI operation +- api layer + - handle the request we allow to operate from UI and API +- storage layer + - persist the pending request, metrics, health history, and event data +- filesystem layer + - persist the core settings and artifacts of CubeCOS + +
+
+ +## ▎Data Preparation Flow (Request Acceleration) + +cube-cos api has an internal data place to prestore the data periodically(or by events from filesystem), so that the incoming request can leverage it immediately. + +
+ +Logo + +
+
+ +the `periodical prestored data` includes: + +- health history + +
+ +the `event driven prestored data` includes: + +- tuning (watching change from `/etc/settings.txt`) +- alert setting (watching event from `/etc/settings.txt`) +- trigger (watching change from `/etc/settings.txt`) +- support file (watching change from `/var/support/`) + +
+ +the `periodical and event driven prestored mixed data` includes: + +- node details (watching change from `node mdns` and `/etc/update`) +- license (watching change from `/etc/update` and `node mdns`) + +
+
+ +## ▎Data Request Flow + +when UI send out the GET request to fetch data, the actually first service it meet is the HAProxy rather than the cube-cos api. before respond to UI, there’s a few components which get involved below + +
+ +Logo + +
+
+ +- keycloak + - Auth free: data center listing api, grafana api, and opensearch api + - UI: auth by SAML + - Pure API: auth by OIDC or internal node token + +
+ +- haproxy + - forward the request between internet and intranet by different ports and paths + - TLS/SSL termination (only do in plain text between api caller and HAProxy) + +
+ +- api + - fetch data from prestored area, influxdb or other peer nodes + - sync the pending status / value to current data + - process data by specific business logic + - filter or paginate data, then do one time response or continues streaming + +
+
+ +## ▎Operation Request Flow + +when UI send out the POST, PATCH, PUT, or DELETE to operate the particular resource, the actual flow in the backend is like + +
+ +Logo + +
+
+ +- keycloak + - same as the Data Request Flow + +
+ +- haproxy + - same as the Data Request Flow + +
+ +- api + - check whether it’s cos ready to operate or the same operation is working in progress + - check whether the resource is valid to operate + - check whether the incoming new value is valid to operate + - check whether it’s local apply or should delegate to remote + - upsert the pending record in the mongodb + - if it’s local apply, then add a task in the internal queue. + - if it should be delegated to peer node, then call the same API to peer node. + - api return the RC 202 to the caller first. + - background operator(inside the api) will fetch the task via queue, and do it desire to do. + - background operator report the status to one of control node whatever the result is succeeded or failed. + - control api update or delete the pending record. + +
+ +- ⚠️ notice for API restart when task is not finished yet + - if the operation is driven by hex tool, then the process won’t be interrupted. + - during a restart, all pending records which are associated to its node will be purged. + +
+
+ +## ▎Relationship of Service Impact + +when you see some unexpected symptom from UI or API, sometime it might not a bug. maybe it’s because cos services are under repairing or can’t function well due to resource overload. + +
+ +Logo + +
+
+ +X axis: the features COS provide through UI and API + +Y axis: the core services in the COS + +for example: + +- when Ceph RGW is not working, then it will impact the integrity of health feature +- when InfluxDB is not working, the return of event, health, and metric features will be failed diff --git a/docs/developing/README.md b/docs/developing/README.md new file mode 100644 index 00000000..4d559fd0 --- /dev/null +++ b/docs/developing/README.md @@ -0,0 +1,76 @@ +## ▎To Start Developing + +The following guide is for anyone to write code which directly accesses the CubeCOS API. + +Genearlly, there're two ways for CubeCOS API developing which help you to have a quick iteration + +- RPM: A comprehensive way to develop with CubeCOS + +- Binary replacement: A faster way to develop with CubeCOS (⚠️ the binary will be reset when CubeCOS get restarted) + +
+ +## ▎Get The Environment Ready + +- Go >= 1.24.0 + +- Taskfile + +- rpmdevtools + +- rpmlint + +
+ +## ▎RPM + +1). Go to cube-cos-api dir + +
+ +2). Execute the task command + +```bash +task rpm:build +``` + +
+ +3). Send the built rpm to a running CubeCOS + +``` +scp @: +``` + +
+ +4). Log in to your CubeCOS + +```bash +systemctl stop cube-cos-api +dnf -y remove cube-cos-api +dnf -y install "" +hex_config bootstrap api +``` + +
+ +## ▎Binary Replacement + +1). Go to cube-cos-api dir + +
+ +2). Replace your CubeCOS IP in the Taskfile.yaml below + +```sh + devCosIp: "" +``` + +
+ +3). Execute the task command + +```bash +task deployRemoteDevApi +``` diff --git a/docs/troubleshooting/README.md b/docs/troubleshooting/README.md new file mode 100644 index 00000000..c0b6ea7c --- /dev/null +++ b/docs/troubleshooting/README.md @@ -0,0 +1 @@ +## ▎ \ No newline at end of file diff --git a/internal/apis/v1/handlers/nodes/helper.go b/internal/apis/v1/handlers/nodes/helper.go index c30031d6..cd564ec3 100644 --- a/internal/apis/v1/handlers/nodes/helper.go +++ b/internal/apis/v1/handlers/nodes/helper.go @@ -4,8 +4,6 @@ import ( "fmt" "sort" - "github.com/bigstack-oss/bigstack-dependency-go/pkg/http" - "github.com/bigstack-oss/cube-cos-api/internal/apis/v1/bodies" "github.com/bigstack-oss/cube-cos-api/internal/apis/v1/queries" "github.com/bigstack-oss/cube-cos-api/internal/cubecos" "github.com/bigstack-oss/cube-cos-api/internal/definition/v1/nodes" @@ -93,24 +91,5 @@ func (h *helper) getNode() (*nodes.Node, error) { return nil, fmt.Errorf("node %s is down", node.Hostname) } - return h.askPeerNode(node) -} - -func (h *helper) askPeerNode(node *nodes.Node) (*nodes.Node, error) { - http := http.GetGlobalHelper() - resp, err := http.R().SetResult(&bodies.Node{}).SetHeaders(nodes.GetSecretHeaders()).Get(node.GetNodeUrl()) - if err != nil { - return nil, err - } - - if !resp.IsError() { - return &resp.Result().(*bodies.Node).Data, nil - } - - return nil, fmt.Errorf( - "nodes(%s): has err resp for node details %s: %s", - h.reqId, - node.Hostname, - string(resp.Body()), - ) + return node, nil } diff --git a/internal/apis/v1/handlers/tunings/parse.go b/internal/apis/v1/handlers/tunings/parse.go index b9c005b8..5635cfd1 100644 --- a/internal/apis/v1/handlers/tunings/parse.go +++ b/internal/apis/v1/handlers/tunings/parse.go @@ -71,14 +71,14 @@ func (h *helper) parseTuningReset() error { } h.tuning.Name = name + h.tuning.Value = spec.Limitation.Default + h.tuning.Enabled = true + h.tuning.SetHosts(h.reset.Hosts) if !h.isTuningModified() { return errors.New("can't reset unmodified tuning") } - h.tuning.Value = spec.Limitation.Default - h.tuning.Enabled = true h.tuning.IsModified = false - h.tuning.SetHosts(h.reset.Hosts) h.tuning.SetResetting() return nil }