diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index b453b546..1a0ab989 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -7,6 +7,20 @@ updates:
interval: daily
time: "09:00"
timezone: "Asia/Taipei"
+ groups:
+ minor-and-patch:
+ applies-to: version-updates
+ update-types:
+ - "minor"
+ - "patch"
+ sec-minor-and-patch:
+ applies-to: security-updates
+ update-types:
+ - "minor"
+ - "patch"
+ ignore:
+ - dependency-name: "*"
+ update-types: ["version-update:semver-major"]
- package-ecosystem: gomod
directory: /
diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml
index dc40fb99..d8c941a7 100644
--- a/.github/workflows/scorecard.yml
+++ b/.github/workflows/scorecard.yml
@@ -23,7 +23,7 @@ jobs:
persist-credentials: false
- name: "Run analysis"
- uses: ossf/scorecard-action@f49aabe0b5af0936a0987cfb85d86b75731b0186 # v2.4.1
+ uses: ossf/scorecard-action@05b42c624433fc40578a4040d5cf5e36ddca8cde # v2.4.2
with:
results_file: results.sarif
results_format: sarif
@@ -37,6 +37,6 @@ jobs:
retention-days: 5
- name: "Upload to code-scanning"
- uses: github/codeql-action/upload-sarif@60168efe1c415ce0f5521ea06d5c2062adbeed1b # v3.28.17
+ uses: github/codeql-action/upload-sarif@fca7ace96b7d713c7035871441bd52efbe39e27e # v3.28.19
with:
sarif_file: results.sarif
diff --git a/README.md b/README.md
index e806816c..1aafc3db 100644
--- a/README.md
+++ b/README.md
@@ -1,68 +1,32 @@
-

+
[![License][License-Image]][License-Url] [![made-with-Go][Go-Made-Image]][Go-Made-Url] [![Go][Go-Report-Image]][Go-Report-Url] [![GitHub issues][Github-Issue-Image]][Github-Issue-Url] [![GitHub last commit (branch)][GitHub-Last-Commit-Image]][GitHub-Last-Commit-Url]
-
-
-▎To Start Developing
-
-
-
-0). Get the build environment ready
-
-We would need an `x86_64` / `amd64` based machine to build the rpm package.
-
-We would need to have `golang` ready on the machine.
-
-We would need rpm build tools.
-
-For Fedora Linux based OS:
+🚀 [API] | ⛩️ [Architecture] | 👷♂️ [Developing] | 🔬 [Troubleshooting]
-```bash
-sudo dnf install -y rpmdevtools rpmlint
-```
+
-
+## ▎Overview
-1). Build rpm
+The cube-cos api is a central communication mechanism in the CubeCOS written in [Go]. Each node has its own cube-cos api and discover peer nodes by [MDNS] for cross-node communication.
-```bash
-task rpm:build
-```
+Additionally, there’re 14+ apis in the CubeCOS, the cube-cos api is just one of apis which responsible for the partial native features of cube-cos currently, but it will cover more and more features in the incoming milestones.
-2). Send the built rpm to a running CubeCOS
-
-```
-scp @:
-```
+
-
-3). Install the rpm and start the service
-
-```bash
-systemctl stop cube-cos-api
-dnf -y remove cube-cos-api
-dnf -y install ""
-hex_config bootstrap api
-```
-
-
----
-
-▎License
-
+## ▎License
Copyright (c) 2025 [Bigstack co., ltd](https://bigstack.co/)
@@ -79,6 +43,12 @@ See the License for the specific language governing permissions and
limitations under the License.
+[API]: https://github.com/bigstack-oss/cube-cos-api/tree/develop/docs/api
+[Architecture]: https://github.com/bigstack-oss/cube-cos-api/tree/develop/docs/architecture
+[Developing]: https://github.com/bigstack-oss/cube-cos-api/tree/develop/docs/developing
+[Troubleshooting]: https://github.com/bigstack-oss/cube-cos-api/tree/develop/docs/troubleshooting
+[Go]: https://go.dev/
+[MDNS]: https://en.wikipedia.org/wiki/Multicast_DNS
[License-Url]: https://www.apache.org/licenses/LICENSE-2.0
[License-Image]: https://img.shields.io/badge/License-Apache2-blue.svg
[Go-Report-Url]: https://goreportcard.com/report/github.com/bigstack-oss/cube-cos-api
diff --git a/assets/images/architecture/base.png b/assets/images/architecture/base.png
new file mode 100644
index 00000000..7a5a4280
Binary files /dev/null and b/assets/images/architecture/base.png differ
diff --git a/assets/images/architecture/component-feature-impact.png b/assets/images/architecture/component-feature-impact.png
new file mode 100644
index 00000000..5514885b
Binary files /dev/null and b/assets/images/architecture/component-feature-impact.png differ
diff --git a/assets/images/architecture/data-preparation.png b/assets/images/architecture/data-preparation.png
new file mode 100644
index 00000000..c9399ed7
Binary files /dev/null and b/assets/images/architecture/data-preparation.png differ
diff --git a/assets/images/architecture/data-request.png b/assets/images/architecture/data-request.png
new file mode 100644
index 00000000..9757310c
Binary files /dev/null and b/assets/images/architecture/data-request.png differ
diff --git a/assets/images/architecture/mdns-broadcast.png b/assets/images/architecture/mdns-broadcast.png
new file mode 100644
index 00000000..11395406
Binary files /dev/null and b/assets/images/architecture/mdns-broadcast.png differ
diff --git a/assets/images/architecture/mdns-query.png b/assets/images/architecture/mdns-query.png
new file mode 100644
index 00000000..708327a3
Binary files /dev/null and b/assets/images/architecture/mdns-query.png differ
diff --git a/assets/images/architecture/mdns-receive.png b/assets/images/architecture/mdns-receive.png
new file mode 100644
index 00000000..f0ebf00c
Binary files /dev/null and b/assets/images/architecture/mdns-receive.png differ
diff --git a/assets/images/architecture/mdns-response.png b/assets/images/architecture/mdns-response.png
new file mode 100644
index 00000000..58f6ad55
Binary files /dev/null and b/assets/images/architecture/mdns-response.png differ
diff --git a/assets/images/architecture/node-access-delegate.png b/assets/images/architecture/node-access-delegate.png
new file mode 100644
index 00000000..950daebd
Binary files /dev/null and b/assets/images/architecture/node-access-delegate.png differ
diff --git a/assets/images/architecture/node-components.png b/assets/images/architecture/node-components.png
new file mode 100644
index 00000000..7c73965d
Binary files /dev/null and b/assets/images/architecture/node-components.png differ
diff --git a/assets/images/architecture/node-view.png b/assets/images/architecture/node-view.png
new file mode 100644
index 00000000..5b588d8b
Binary files /dev/null and b/assets/images/architecture/node-view.png differ
diff --git a/assets/images/architecture/operation-request.png b/assets/images/architecture/operation-request.png
new file mode 100644
index 00000000..6a2766ba
Binary files /dev/null and b/assets/images/architecture/operation-request.png differ
diff --git a/assets/images/bigstack.png b/assets/images/panel/api.png
similarity index 100%
rename from assets/images/bigstack.png
rename to assets/images/panel/api.png
diff --git a/assets/videos/api/interfaces.mov b/assets/videos/api/interfaces.mov
new file mode 100644
index 00000000..ef774a7e
Binary files /dev/null and b/assets/videos/api/interfaces.mov differ
diff --git a/configs/cube-cos-api.yaml.template b/configs/cube-cos-api.yaml.template
index 9d7be9de..c03b80ba 100644
--- a/configs/cube-cos-api.yaml.template
+++ b/configs/cube-cos-api.yaml.template
@@ -54,7 +54,8 @@ spec:
database: cube-cos
replicaSet: cube-cos-rs
auth:
- enabled: true
+ enable: true
+ source: admin
username: admin
password: admin
influxdb:
diff --git a/docs/api/README.md b/docs/api/README.md
new file mode 100644
index 00000000..4b116da6
--- /dev/null
+++ b/docs/api/README.md
@@ -0,0 +1,39 @@
+## ▎Overiew
+
+The following guide is for anyone who want to see the comperhensive list for API usage.
+
+Genearlly, there're two ways to find it out
+
+- Access on CubeCOS
+
+- Copy JSON to Swagger online editor
+
+You should be able to see the doc like the video below via either way
+
+https://github.com/bigstack-oss/cube-cos-api/blob/develop/assets/videos/api/interfaces.mov
+
+
+
+## ▎Access on CubeCOS
+
+1). Log in your CubeCOS by UI
+
+
+
+2). Paste the URL below in the browser
+
+- conention: https://{CubeCOS's Virtual IP}/api/v1/datacenters/{data center name}/apidocs/index.html
+
+- for example: https://10.32.45.10/api/v1/datacenters/demo-site/apidocs/index.html
+
+
+
+## ▎Swagger Online Editor
+
+1). Copy the JSON content from https://github.com/bigstack-oss/cube-cos-api/blob/develop/api/docs.json
+
+
+
+2). Paste it to the Swagger's online editor
+
+https://editor.swagger.io/
diff --git a/docs/architecture/README.md b/docs/architecture/README.md
new file mode 100644
index 00000000..8f4aa28d
--- /dev/null
+++ b/docs/architecture/README.md
@@ -0,0 +1,268 @@
+## ▎Architecture - Base
+
+Start from the cube-cos 3.0.0, cube-cos api replace the LMI(legacy UI + API stack) to become a central communication mechanism in the cube-cos. each node has its own cube-cos api and discover peer nodes by MDNS for cross-node communication
+
+
+
+
+
+
+
+
+Additionally, there’re `14+ apis in the cube-cos`, the `cube-cos api` is just one of apis which responsible for the `partial native features` of CubeCOS currently and will cover more and more features in the incoming milestones.
+
+```bash
+(the apis below not includes the k3s api, rancher api, ceph api, and so on...)
+
+$ systemctl --type=service | grep api
+ cube-cos-api.service loaded active running CubeCosApi
+ cyborg-api.service loaded active running OpenStack Acceleration API service
+ designate-api.service loaded active running OpenStack Designate DNSaaS API
+ masakari-api.service loaded active running OpenStack Masakari Api service
+ octavia-api.service loaded active running OpenStack Octavia API service
+ openstack-cinder-api.service loaded active running OpenStack Cinder API Server
+ openstack-glance-api.service loaded active running OpenStack Image Service (code-named Glance) API server
+ openstack-heat-api-cfn.service loaded active running Openstack Heat CFN-compatible API Service
+ openstack-heat-api.service loaded active running OpenStack Heat API Service
+ openstack-manila-api.service loaded active running OpenStack Manila API Server
+ openstack-nova-api.service loaded active running OpenStack Nova API Server
+ openstack-senlin-api.service loaded active running OpenStack Senlin API Server
+ openstack-watcher-api.service loaded active running OpenStack Watcher API service
+ skyline-apiserver.service loaded active running Skyline APIServer
+```
+
+
+
+
+## ▎Architecture - Service Discovery
+
+cube-cos api discover each other through MDNS protocol(`UDP 5353 port`) with a `data center identity`: `{data center name}-{virtual ip}-{first 8 chars of keycloak odic secret}`
+
+for example: `control-10.32.45.10-g2u1bojz`. the api will broadcast its node details to all peer nodes for `every 20s`.
+
+
+
+
+
+
+
+
+the payload in the MDNS broadcast is like, for example:
+
+```bash
+{
+ "metadata": {
+ "broker": "http",
+ "dataCenter": "control",
+ "hostname": "cube451",
+ "ip": "10.32.45.1",
+ "isGpuEnabled": "false",
+ "nodeID": "fd3b8e3f",
+ "protocol": "http",
+ "registry": "mdns",
+ "role": "control-converged",
+ "serialNumber": "1MXXZH2",
+ "server": "http"
+ },
+ "service": "control-10.32.45.10-g2u1bojz",
+ "version": "latest",
+ "endpoints": null
+}
+```
+
+
+
+all apis will receive node details through the flow above by identifying the data center identity, then resync the data in the pre-stored data place.
+
+
+
+
+
+
+the TTL of the node details in each node is 60s, when the record is expired, the cube-cos api will ask “who owned the service for {data center identity}” via MDNS broadcast to resync the node list again (⚠️ /etc/settings.cluster.json will also be involved in the process of node sync to know who should be online
+)
+
+
+
+
+
+
+
+
+
+
+
+for request communication or delegation, each cube-cos api will know whether the request should be operated locally or delegate to other peer nodes (internal node communication also requires token auth).
+
+
+
+
+
+
+
+
+## ▎Architecture - Inside A Node
+
+from the perspective of single node, there’re 6 layers co-working with the api to handle different types of request.
+
+
+
+
+
+
+
+
+the R&R, components, and purpose from the each layer is like
+
+
+
+
+
+
+
+
+- auth layer
+ - check, verify, allow or block the request
+- transfer layer
+ - do protocol termination and port forward
+- core layer
+ - the core services of cube-cos, provides everything we need for HCI operation
+- api layer
+ - handle the request we allow to operate from UI and API
+- storage layer
+ - persist the pending request, metrics, health history, and event data
+- filesystem layer
+ - persist the core settings and artifacts of CubeCOS
+
+
+
+
+## ▎Data Preparation Flow (Request Acceleration)
+
+cube-cos api has an internal data place to prestore the data periodically(or by events from filesystem), so that the incoming request can leverage it immediately.
+
+
+
+
+
+
+
+
+the `periodical prestored data` includes:
+
+- health history
+
+
+
+the `event driven prestored data` includes:
+
+- tuning (watching change from `/etc/settings.txt`)
+- alert setting (watching event from `/etc/settings.txt`)
+- trigger (watching change from `/etc/settings.txt`)
+- support file (watching change from `/var/support/`)
+
+
+
+the `periodical and event driven prestored mixed data` includes:
+
+- node details (watching change from `node mdns` and `/etc/update`)
+- license (watching change from `/etc/update` and `node mdns`)
+
+
+
+
+## ▎Data Request Flow
+
+when UI send out the GET request to fetch data, the actually first service it meet is the HAProxy rather than the cube-cos api. before respond to UI, there’s a few components which get involved below
+
+
+
+
+
+
+
+
+- keycloak
+ - Auth free: data center listing api, grafana api, and opensearch api
+ - UI: auth by SAML
+ - Pure API: auth by OIDC or internal node token
+
+
+
+- haproxy
+ - forward the request between internet and intranet by different ports and paths
+ - TLS/SSL termination (only do in plain text between api caller and HAProxy)
+
+
+
+- api
+ - fetch data from prestored area, influxdb or other peer nodes
+ - sync the pending status / value to current data
+ - process data by specific business logic
+ - filter or paginate data, then do one time response or continues streaming
+
+
+
+
+## ▎Operation Request Flow
+
+when UI send out the POST, PATCH, PUT, or DELETE to operate the particular resource, the actual flow in the backend is like
+
+
+
+
+
+
+
+
+- keycloak
+ - same as the Data Request Flow
+
+
+
+- haproxy
+ - same as the Data Request Flow
+
+
+
+- api
+ - check whether it’s cos ready to operate or the same operation is working in progress
+ - check whether the resource is valid to operate
+ - check whether the incoming new value is valid to operate
+ - check whether it’s local apply or should delegate to remote
+ - upsert the pending record in the mongodb
+ - if it’s local apply, then add a task in the internal queue.
+ - if it should be delegated to peer node, then call the same API to peer node.
+ - api return the RC 202 to the caller first.
+ - background operator(inside the api) will fetch the task via queue, and do it desire to do.
+ - background operator report the status to one of control node whatever the result is succeeded or failed.
+ - control api update or delete the pending record.
+
+
+
+- ⚠️ notice for API restart when task is not finished yet
+ - if the operation is driven by hex tool, then the process won’t be interrupted.
+ - during a restart, all pending records which are associated to its node will be purged.
+
+
+
+
+## ▎Relationship of Service Impact
+
+when you see some unexpected symptom from UI or API, sometime it might not a bug. maybe it’s because cos services are under repairing or can’t function well due to resource overload.
+
+
+
+
+
+
+
+
+X axis: the features COS provide through UI and API
+
+Y axis: the core services in the COS
+
+for example:
+
+- when Ceph RGW is not working, then it will impact the integrity of health feature
+- when InfluxDB is not working, the return of event, health, and metric features will be failed
diff --git a/docs/developing/README.md b/docs/developing/README.md
new file mode 100644
index 00000000..4d559fd0
--- /dev/null
+++ b/docs/developing/README.md
@@ -0,0 +1,76 @@
+## ▎To Start Developing
+
+The following guide is for anyone to write code which directly accesses the CubeCOS API.
+
+Genearlly, there're two ways for CubeCOS API developing which help you to have a quick iteration
+
+- RPM: A comprehensive way to develop with CubeCOS
+
+- Binary replacement: A faster way to develop with CubeCOS (⚠️ the binary will be reset when CubeCOS get restarted)
+
+
+
+## ▎Get The Environment Ready
+
+- Go >= 1.24.0
+
+- Taskfile
+
+- rpmdevtools
+
+- rpmlint
+
+
+
+## ▎RPM
+
+1). Go to cube-cos-api dir
+
+
+
+2). Execute the task command
+
+```bash
+task rpm:build
+```
+
+
+
+3). Send the built rpm to a running CubeCOS
+
+```
+scp @:
+```
+
+
+
+4). Log in to your CubeCOS
+
+```bash
+systemctl stop cube-cos-api
+dnf -y remove cube-cos-api
+dnf -y install ""
+hex_config bootstrap api
+```
+
+
+
+## ▎Binary Replacement
+
+1). Go to cube-cos-api dir
+
+
+
+2). Replace your CubeCOS IP in the Taskfile.yaml below
+
+```sh
+ devCosIp: ""
+```
+
+
+
+3). Execute the task command
+
+```bash
+task deployRemoteDevApi
+```
diff --git a/docs/troubleshooting/README.md b/docs/troubleshooting/README.md
new file mode 100644
index 00000000..c0b6ea7c
--- /dev/null
+++ b/docs/troubleshooting/README.md
@@ -0,0 +1 @@
+## ▎
\ No newline at end of file
diff --git a/internal/apis/v1/handlers/nodes/helper.go b/internal/apis/v1/handlers/nodes/helper.go
index c30031d6..cd564ec3 100644
--- a/internal/apis/v1/handlers/nodes/helper.go
+++ b/internal/apis/v1/handlers/nodes/helper.go
@@ -4,8 +4,6 @@ import (
"fmt"
"sort"
- "github.com/bigstack-oss/bigstack-dependency-go/pkg/http"
- "github.com/bigstack-oss/cube-cos-api/internal/apis/v1/bodies"
"github.com/bigstack-oss/cube-cos-api/internal/apis/v1/queries"
"github.com/bigstack-oss/cube-cos-api/internal/cubecos"
"github.com/bigstack-oss/cube-cos-api/internal/definition/v1/nodes"
@@ -93,24 +91,5 @@ func (h *helper) getNode() (*nodes.Node, error) {
return nil, fmt.Errorf("node %s is down", node.Hostname)
}
- return h.askPeerNode(node)
-}
-
-func (h *helper) askPeerNode(node *nodes.Node) (*nodes.Node, error) {
- http := http.GetGlobalHelper()
- resp, err := http.R().SetResult(&bodies.Node{}).SetHeaders(nodes.GetSecretHeaders()).Get(node.GetNodeUrl())
- if err != nil {
- return nil, err
- }
-
- if !resp.IsError() {
- return &resp.Result().(*bodies.Node).Data, nil
- }
-
- return nil, fmt.Errorf(
- "nodes(%s): has err resp for node details %s: %s",
- h.reqId,
- node.Hostname,
- string(resp.Body()),
- )
+ return node, nil
}
diff --git a/internal/apis/v1/handlers/tunings/parse.go b/internal/apis/v1/handlers/tunings/parse.go
index b9c005b8..5635cfd1 100644
--- a/internal/apis/v1/handlers/tunings/parse.go
+++ b/internal/apis/v1/handlers/tunings/parse.go
@@ -71,14 +71,14 @@ func (h *helper) parseTuningReset() error {
}
h.tuning.Name = name
+ h.tuning.Value = spec.Limitation.Default
+ h.tuning.Enabled = true
+ h.tuning.SetHosts(h.reset.Hosts)
if !h.isTuningModified() {
return errors.New("can't reset unmodified tuning")
}
- h.tuning.Value = spec.Limitation.Default
- h.tuning.Enabled = true
h.tuning.IsModified = false
- h.tuning.SetHosts(h.reset.Hosts)
h.tuning.SetResetting()
return nil
}