Skip to content

Commit fd8ce88

Browse files
committed
Add: Project + Instrumentation
1 parent bba075a commit fd8ce88

File tree

17 files changed

+1468
-3
lines changed

17 files changed

+1468
-3
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
**/*.pptx
22

3+
**/**/node_modules
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
alertmanager:
2+
alertmanagerSpec:
3+
# Selects Alertmanager configuration based on these labels. Ensure that the Alertmanager configuration has matching labels.
4+
# ✅ Solves error: Misconfigured Alertmanager selectors can lead to missing alert configurations.
5+
# ✅ Solves error: Alertmanager wasn't able to findout the applied CRD (kind: Alertmanagerconfig)
6+
alertmanagerConfigSelector:
7+
matchLabels:
8+
release: monitoring
9+
10+
# Sets the number of Alertmanager replicas to 3 for high availability.
11+
# ✅ Solves error: Single replica can cause alerting issues during pod failures.
12+
# ✅ Solves error: Alertmanager Cluster Status is Disabled (GitHub issue)
13+
replicas: 2
14+
15+
# Sets the strategy for matching Alertmanager configurations. 'None' means no specific matching strategy.
16+
# ✅ Solves error: Incorrect matcher strategy can lead to unhandled alert configurations.
17+
# ✅ Solves error: Get rid of namespace matchers when creating AlertManagerConfig (GitHub issue)
18+
alertmanagerConfigMatcherStrategy:
19+
type: None

day-2/readme.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,11 @@ helm repo update
9191
kubectl create ns monitoring
9292
```
9393
```bash
94-
helm install monitoring \
95-
--namespace monitoring \
96-
prometheus-community/kube-prometheus-stack
94+
cd day-2
95+
96+
helm install monitoring prometheus-community/kube-prometheus-stack \
97+
-n monitoring \
98+
-f custom_kube_prometheus_stack.yml
9799
```
98100

99101
### ✅ Step 4: Verify the Installation
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
apiVersion: monitoring.coreos.com/v1alpha1
2+
kind: AlertmanagerConfig
3+
metadata:
4+
name: main-rules-alert-config
5+
namespace: monitoring
6+
labels:
7+
release: monitoring
8+
spec:
9+
route:
10+
repeatInterval: 30m
11+
receiver: 'null'
12+
routes:
13+
- matchers:
14+
- name: alertname
15+
value: HighCpuUsage
16+
receiver: 'send-email'
17+
- matchers:
18+
- name: alertname
19+
value: PodRestart
20+
receiver: 'send-email'
21+
repeatInterval: 5m
22+
receivers:
23+
- name: 'send-email'
24+
emailConfigs:
25+
- to: 'ankitjodhani1903@gmail.com'
26+
from: 'ankitjodhani1903@gmail.com'
27+
sendResolved: false
28+
smarthost: smtp.gmail.com:587
29+
authUsername: 'ankitjodhani1903@gmail.com'
30+
authIdentity: 'ankitjodhani1903@gmail.com'
31+
authPassword:
32+
name: mail-pass
33+
key: gmail-pass
34+
- name: 'null'
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
apiVersion: monitoring.coreos.com/v1
2+
kind: PrometheusRule
3+
metadata:
4+
name: custom-alert-rules
5+
namespace: monitoring
6+
labels:
7+
release: monitoring # if you installed through then you've to mention the release name of helm, otherwise prometheus will not recognize it
8+
spec:
9+
groups:
10+
- name: custom.rules
11+
rules:
12+
- alert: HighCpuUsage
13+
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 50
14+
for: 5m
15+
labels:
16+
severity: warning
17+
annotations:
18+
summary: "High CPU usage on instance {{ $labels.instance }}"
19+
description: "CPU usage is above 50% (current value: {{ $value }}%)"
20+
- alert: PodRestart
21+
expr: kube_pod_container_status_restarts_total > 2
22+
for: 0m
23+
labels:
24+
severity: critical
25+
annotations:
26+
summary: "Pod restart detected in namespace {{ $labels.namespace }}"
27+
description: "Pod {{ $labels.pod }} in namespace {{ $labels.namespace }} has restarted {{ $value }} times"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
apiVersion: v1
2+
kind: Secret
3+
type: Opaque
4+
metadata:
5+
name: mail-pass
6+
namespace: monitoring
7+
labels:
8+
release: monitoring
9+
data:
10+
gmail-pass: <<ENTER_YOUR_APP PASSWORDS_IN_BASE64_ENCODED_FORMAT>>
11+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
apiVersion: kustomize.config.k8s.io/v1beta1
2+
kind: Kustomization
3+
namespace: monitoring
4+
resources:
5+
- alerts.yml
6+
- email-secrets.yml
7+
- alertmangerconfig.yml
8+
- serviceMonitor.yml
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
apiVersion: monitoring.coreos.com/v1
2+
kind: ServiceMonitor
3+
metadata:
4+
labels:
5+
app: prometheus-service-service-monitor
6+
release: monitoring
7+
name: prometheus-service-service-monitor
8+
namespace: monitoring
9+
spec:
10+
jobLabel: job
11+
endpoints:
12+
- interval: 2s
13+
port: prometheus-service-port
14+
path: /metrics
15+
selector:
16+
matchLabels:
17+
app: prometheus-service
18+
namespaceSelector:
19+
matchNames:
20+
- default
21+

day-4/app-code/Dockerfile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
FROM node:18-alpine
2+
3+
COPY package*.json /usr/app/
4+
5+
COPY index.js /usr/app/
6+
7+
WORKDIR /usr/app
8+
9+
RUN npm install
10+
11+
CMD ["node", "index.js"]

day-4/app-code/index.js

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
const express = require('express');
2+
const morgan = require('morgan');
3+
const pino = require('pino');
4+
const promClient = require('prom-client');
5+
6+
const app = express();
7+
8+
const logger = pino();
9+
10+
const logging = () => {
11+
logger.info("Here are the logs")
12+
logger.info("Please have a look ")
13+
logger.info("This is just for testing")
14+
}
15+
16+
app.use(morgan('common'))
17+
18+
19+
// Prometheus metrics
20+
const httpRequestCounter = new promClient.Counter({
21+
name: 'http_requests_total',
22+
help: 'Total number of HTTP requests',
23+
labelNames: ['method', 'path', 'status_code'],
24+
});
25+
26+
const requestDurationHistogram = new promClient.Histogram({
27+
name: 'http_request_duration_seconds',
28+
help: 'Duration of HTTP requests in seconds',
29+
labelNames: ['method', 'path', 'status_code'],
30+
buckets: [0.1, 0.5, 1, 5, 10], // Buckets for the histogram in seconds
31+
});
32+
33+
const requestDurationSummary = new promClient.Summary({
34+
name: 'http_request_duration_summary_seconds',
35+
help: 'Summary of the duration of HTTP requests in seconds',
36+
labelNames: ['method', 'path', 'status_code'],
37+
percentiles: [0.5, 0.9, 0.99], // Define your percentiles here
38+
});
39+
40+
41+
42+
// Gauge metric
43+
const gauge = new promClient.Gauge({
44+
name: 'node_gauge_example',
45+
help: 'Example of a gauge tracking async task duration',
46+
labelNames: ['method', 'status']
47+
});
48+
49+
// Define an async function that simulates a task taking random time
50+
const simulateAsyncTask = async () => {
51+
const randomTime = Math.random() * 5; // Random time between 0 and 5 seconds
52+
return new Promise((resolve) => setTimeout(resolve, randomTime * 1000));
53+
};
54+
55+
app.disable('etag');
56+
57+
// Middleware to track metrics
58+
app.use((req, res, next) => {
59+
const start = Date.now();
60+
res.on('finish', () => {
61+
const duration = (Date.now() - start) / 1000; // Duration in seconds
62+
const { method, url } = req;
63+
const statusCode = res.statusCode; // Get the actual HTTP status code
64+
httpRequestCounter.labels({ method, path: url, status_code: statusCode }).inc();
65+
requestDurationHistogram.labels({ method, path: url, status_code: statusCode }).observe(duration);
66+
requestDurationSummary.labels({ method, path: url, status_code: statusCode }).observe(duration);
67+
});
68+
next();
69+
});
70+
71+
app.get('/', (req, res) => {
72+
res.status(200).json({
73+
status: "🏃- Running"
74+
});
75+
});
76+
77+
app.get('/healthy', (req, res) => {
78+
res.status(200).json({
79+
name: "👀 - Obserability 🔥- Abhishek Veeramalla",
80+
status: "healthy"
81+
})
82+
});
83+
84+
app.get('/serverError', (req, res) => {
85+
res.status(500).json({
86+
error: " Internal server error",
87+
statusCode: 500
88+
})
89+
});
90+
91+
app.get('/notFound', (req, res) => {
92+
res.status(404).json({
93+
error: "Not Found",
94+
statusCode: "404"
95+
})
96+
});
97+
98+
app.get('/logs', (req, res) => {
99+
logging();
100+
res.status(200).json({
101+
objective: "To generate logs"
102+
})
103+
});
104+
105+
106+
// Simulate a crash by throwing an error
107+
app.get('/crash', (req, res) => {
108+
console.log('Intentionally crashing the server...');
109+
process.exit(1);
110+
});
111+
112+
113+
// Define the /example route
114+
app.get('/example', async (req, res) => {
115+
const endGauge = gauge.startTimer({ method: req.method, status: res.statusCode });
116+
await simulateAsyncTask();
117+
endGauge();
118+
res.send('Async task completed');
119+
});
120+
121+
// Expose metrics for Prometheus to scrape
122+
app.get('/metrics', async (req, res) => {
123+
res.set('Content-Type', promClient.register.contentType);
124+
res.end(await promClient.register.metrics());
125+
});
126+
127+
128+
const PORT = 3000;
129+
app.listen(PORT, () => {
130+
console.log(`listening on port ${PORT}`)
131+
})
132+

0 commit comments

Comments
 (0)