Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .bingo/Variables.mk
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,6 @@ $(DEX): $(BINGO_DIR)/dex.mod
@echo "(re)installing $(GOBIN)/dex-v0.0.0-20200512115545-709d4169d646"
@cd $(BINGO_DIR) && GOWORK=off $(GO) build -mod=mod -modfile=dex.mod -o=$(GOBIN)/dex-v0.0.0-20200512115545-709d4169d646 "github.com/dexidp/dex/cmd/dex"

EMBEDMD := $(GOBIN)/embedmd-v1.0.0
$(EMBEDMD): $(BINGO_DIR)/embedmd.mod
@# Install binary/ries using Go 1.14+ build command. This is using bwplotka/bingo-controlled, separate go module with pinned dependencies.
@echo "(re)installing $(GOBIN)/embedmd-v1.0.0"
@cd $(BINGO_DIR) && GOWORK=off $(GO) build -mod=mod -modfile=embedmd.mod -o=$(GOBIN)/embedmd-v1.0.0 "github.com/campoy/embedmd"

GOJSONTOYAML := $(GOBIN)/gojsontoyaml-v0.0.0-20200602132005-3697ded27e8c
$(GOJSONTOYAML): $(BINGO_DIR)/gojsontoyaml.mod
@# Install binary/ries using Go 1.14+ build command. This is using bwplotka/bingo-controlled, separate go module with pinned dependencies.
Expand Down Expand Up @@ -83,6 +77,12 @@ $(KUBECONFORM): $(BINGO_DIR)/kubeconform.mod
@echo "(re)installing $(GOBIN)/kubeconform-v0.7.0"
@cd $(BINGO_DIR) && GOWORK=off $(GO) build -mod=mod -modfile=kubeconform.mod -o=$(GOBIN)/kubeconform-v0.7.0 "github.com/yannh/kubeconform/cmd/kubeconform"

MDOX := $(GOBIN)/mdox-v0.9.1-0.20250909081353-65d927203516
$(MDOX): $(BINGO_DIR)/mdox.mod
@# Install binary/ries using Go 1.14+ build command. This is using bwplotka/bingo-controlled, separate go module with pinned dependencies.
@echo "(re)installing $(GOBIN)/mdox-v0.9.1-0.20250909081353-65d927203516"
@cd $(BINGO_DIR) && GOWORK=off $(GO) build -mod=mod -modfile=mdox.mod -o=$(GOBIN)/mdox-v0.9.1-0.20250909081353-65d927203516 "github.com/bwplotka/mdox"

OAPI_CODEGEN := $(GOBIN)/oapi-codegen-v2.5.1
$(OAPI_CODEGEN): $(BINGO_DIR)/oapi-codegen.mod
@# Install binary/ries using Go 1.14+ build command. This is using bwplotka/bingo-controlled, separate go module with pinned dependencies.
Expand Down
5 changes: 0 additions & 5 deletions .bingo/embedmd.mod

This file was deleted.

4 changes: 0 additions & 4 deletions .bingo/embedmd.sum

This file was deleted.

5 changes: 5 additions & 0 deletions .bingo/mdox.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module _ // Auto generated by https://github.com/bwplotka/bingo. DO NOT EDIT

go 1.24.10

require github.com/bwplotka/mdox v0.9.1-0.20250909081353-65d927203516
620 changes: 620 additions & 0 deletions .bingo/mdox.sum

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions .bingo/variables.env
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ BINGO="${GOBIN}/bingo-v0.9.0"

DEX="${GOBIN}/dex-v0.0.0-20200512115545-709d4169d646"

EMBEDMD="${GOBIN}/embedmd-v1.0.0"

GOJSONTOYAML="${GOBIN}/gojsontoyaml-v0.0.0-20200602132005-3697ded27e8c"

GOLANGCI_LINT="${GOBIN}/golangci-lint-v2.7.0"
Expand All @@ -30,6 +28,8 @@ JSONNETFMT="${GOBIN}/jsonnetfmt-v0.21.0"

KUBECONFORM="${GOBIN}/kubeconform-v0.7.0"

MDOX="${GOBIN}/mdox-v0.9.1-0.20250909081353-65d927203516"

OAPI_CODEGEN="${GOBIN}/oapi-codegen-v2.5.1"

OPA="${GOBIN}/opa-v1.5.1"
Expand Down
11 changes: 7 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,16 @@ tmp/help.txt: $(BIN_NAME) $(TMP_DIR)
tmp/load_help.txt: $(TMP_DIR)
-./test/load.sh -h > $(TMP_DIR)/load_help.txt 2&>1

README.md: $(EMBEDMD) tmp/help.txt
$(EMBEDMD) -w README.md
README.md: $(MDOX) $(BIN_NAME)
$(MDOX) fmt $@

benchmark.md: $(EMBEDMD) tmp/load_help.txt
.PHONY: docs/benchmark.md
docs/benchmark.md: $(MDOX)
$(MDOX) fmt $@

benchmark:
-rm -rf ./docs/loadtests
PATH=$$PATH:$(BIN_DIR):$(FIRST_GOPATH)/bin ./test/load.sh -r 300 -c 1000 -m 3 -q 10 -o gnuplot
$(EMBEDMD) -w docs/benchmark.md

$(BIN_NAME): deps main.go rules/rules.go $(wildcard *.go) $(wildcard */*.go)
CGO_ENABLED=0 GOOS=$(OS) GOARCH=$(ARCH) GO111MODULE=on GOPROXY=https://proxy.golang.org go build -a -ldflags '-s -w -X main.Version=$(VERSION) -X main.Branch=$(GIT_BRANCH) -X main.Revision=$(GIT_REVISION)' -o $(BIN_NAME) .
Expand Down
31 changes: 10 additions & 21 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
# Observatorium

[![CircleCI](https://circleci.com/gh/observatorium/api.svg?style=svg)](https://circleci.com/gh/observatorium/api)
[![Go Doc](https://godoc.org/github.com/observatorium/api?status.svg)](http://godoc.org/github.com/observatorium/api)
[![Go Report Card](https://goreportcard.com/badge/github.com/observatorium/api)](https://goreportcard.com/report/github.com/observatorium/api)
[![CircleCI](https://circleci.com/gh/observatorium/api.svg?style=svg)](https://circleci.com/gh/observatorium/api) [![Go Doc](https://godoc.org/github.com/observatorium/api?status.svg)](http://godoc.org/github.com/observatorium/api) [![Go Report Card](https://goreportcard.com/badge/github.com/observatorium/api)](https://goreportcard.com/report/github.com/observatorium/api)

This project is an API server for Observatorium.
The API provides an authenticated and authorized, multi-tenant interface for writing and reading observability signals, i.e. metrics and logs.
This project is an API server for Observatorium. The API provides an authenticated and authorized, multi-tenant interface for writing and reading observability signals, i.e. metrics and logs.

## Getting started

Expand All @@ -27,47 +24,39 @@ The Observatorium API server fulfills requests by proxying reads and writes to a

### Metrics

The Observatorium API server can serve read and write requests for Prometheus metrics.
In order to handle requests for metrics, a compatible backend must be configured.
The Observatorium API server can serve read and write requests for Prometheus metrics. In order to handle requests for metrics, a compatible backend must be configured.

#### --metrics.read.endpoint

The backend from which to read metrics can be specified with the `--metrics.read.endpoint` flag.
Compatible backends must implement the Prometheus HTTP API, e.g. Prometheus, Thanos querier, Cortex, etc.
The backend from which to read metrics can be specified with the `--metrics.read.endpoint` flag. Compatible backends must implement the Prometheus HTTP API, e.g. Prometheus, Thanos querier, Cortex, etc.

#### --metrics.write.endpoint

The backend to which to write metrics can be specified with the `--metrics.write.endpoint` flag.
Compatible backends must implement the Prometheus remote-write API, e.g. Thanos receiver, Cortex, etc.
The backend to which to write metrics can be specified with the `--metrics.write.endpoint` flag. Compatible backends must implement the Prometheus remote-write API, e.g. Thanos receiver, Cortex, etc.

#### --metrics.rules.endpoint

The rules backend to where rules can be stored can be specified with the `--metrics.rules.endpoint` flag.

### Logs

The Observatorium API server can serve read and write requests for logs.
In order to handle requests for logs, a compatible backend must be configured.
The Observatorium API server can serve read and write requests for logs. In order to handle requests for logs, a compatible backend must be configured.

#### --logs.read.endpoint

The backend from which to read logs can be specified with the `--logs.read.endpoint` flag.
Compatible backends must implement the Loki read API, e.g. Loki.
The backend from which to read logs can be specified with the `--logs.read.endpoint` flag. Compatible backends must implement the Loki read API, e.g. Loki.

#### --logs.tail.endpoint

The backend from which to tail logs can be specified with the `--logs.tail.endpoint` flag.
Compatible backends must implement the Loki tail API, e.g. Loki.
The backend from which to tail logs can be specified with the `--logs.tail.endpoint` flag. Compatible backends must implement the Loki tail API, e.g. Loki.

#### --logs.write.endpoint

The backend to which to write logs can be specified with the `--logs.write.endpoint` flag.
Compatible backends must implement the Loki write API, e.g. Loki.
The backend to which to write logs can be specified with the `--logs.write.endpoint` flag. Compatible backends must implement the Loki write API, e.g. Loki.

## Usage

[embedmd]:# (tmp/help.txt)
```txt
```txt mdox-exec="./observatorium-api -h"
Usage of ./observatorium-api:
-debug.block-profile-rate int
The percentage of goroutine blocking events that are reported in the blocking profile. (default 10)
Expand Down
58 changes: 36 additions & 22 deletions docs/benchmark.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Benchmark baseline results

This document contains baseline benchmark results for Observatorium API under synthetic load.
This document contains baseline benchmark results for Observatorium API under synthetic load.

Tested on:

Expand All @@ -12,8 +12,7 @@ Tested on:

Generated using:

[embedmd]:# (../tmp/load_help.txt)
```txt
```txt mdox-exec="./test/load.sh -h"
load.sh [-h] [-r n] [-c n] [-m n] [-q n] [-o csv|gnuplot] -- program to test synthetic load on observatorium api and report results.

where:
Expand All @@ -31,33 +30,35 @@ With parameters:
$ ./test/load.sh -r 300 -c 1000 -m 3 -q 10 -o gnuplot
```

> It runs tets for 5 minutes, simulating 3000 machines sending metrics and 10 consumers querying for their data every second.
> Observatorim API GW runs in-front of a mock provider which always responds with a successful response.
> It runs tests for 5 minutes, simulating 3000 machines sending metrics and 10 consumers querying for their data every second. Observatorim API GW runs in-front of a mock provider which always responds with a successful response.

## Results

Most relevant results are the ones on resource consumption.
CPU usage is pretty much stable.
Memory usage correlates with the number of goroutines, which correlates the number of open connections.
Memory usage increases and request latencies increase as the backend services' load increase, which is expected.
Most relevant results are the ones on resource consumption. CPU usage is pretty much stable. Memory usage correlates with the number of goroutines, which correlates the number of open connections. Memory usage increases and request latencies increase as the backend services' load increase, which is expected.

### Resource consumption

#### CPU Usage

> `rate(process_cpu_seconds_total{job="observatorium"}[1m]) * 1000`
```
rate(process_cpu_seconds_total{job="observatorium"}[1m]) * 1000
```

![./loadtests/cpu.png](./loadtests/cpu.png)

#### Memory Usage

> `process_resident_memory_bytes{job="observatorium"}'`
```
process_resident_memory_bytes{job="observatorium"}
```

![./loadtests/mem.png](./loadtests/mem.png)

#### Number of Goroutines

> go_goroutines{job="observatorium"}'
```
go_goroutines{job="observatorium"}
```

![./loadtests/goroutines.png](./loadtests/goroutines.png)

Expand All @@ -67,41 +68,54 @@ Memory usage increases and request latencies increase as the backend services' l

##### Write P99

> histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job="observatorium", handler="write"}[1m])))'
```
histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job="observatorium", handler="write"}[1m])))
```

![./loadtests/write_dur_99.png](./loadtests/write_dur_99.png)

##### Write P50

> histogram_quantile(0.50, sum by (job, le) (rate(http_request_duration_seconds_bucket{job="observatorium", handler="write"}[1m])))'
```
histogram_quantile(0.50, sum by (job, le) (rate(http_request_duration_seconds_bucket{job="observatorium", handler="write"}[1m])))
```

![./loadtests/write_dur_50.png](./loadtests/write_dur_50.png)

##### Write Average

> 100 * (sum by (job) (rate(http_request_duration_seconds_sum{job="observatorium", handler="write"}[1m])) * 100
> /
> sum by (job) (rate(http_request_duration_seconds_count{job="observatorium", handler="write"}[1m])))'
```
100 * (sum by (job) (rate(http_request_duration_seconds_sum{job="observatorium", handler="write"}[1m])) * 100
/
sum by (job) (rate(http_request_duration_seconds_count{job="observatorium", handler="write"}[1m])))
```

![./loadtests/write_dur_avg.png](./loadtests/write_dur_avg.png)

#### Query Range Latency Quartiles

##### Query P99

> histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job="observatorium", handler="query_range"}[1m])))'
```
histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job="observatorium", handler="query_range"}[1m])))
```

![./loadtests/query_range_dur_99.png](./loadtests/query_range_dur_99.png)

##### Query P50

> histogram_quantile(0.50, sum by (job, le) (rate(http_request_duration_seconds_bucket{job="observatorium", handler="query_range"}[1m])))'
```
histogram_quantile(0.50, sum by (job, le) (rate(http_request_duration_seconds_bucket{job="observatorium", handler="query_range"}[1m])))
```

![./loadtests/query_range_dur_50.png](./loadtests/query_range_dur_50.png)

##### Query Average
> 100 * (sum by (job) (rate(http_request_duration_seconds_sum{job="observatorium", handler="query_range"}[1m]))
> /
> sum by (job) (rate(http_request_duration_seconds_count{job="observatorium", handler="query_range"}[1m])))'

```
100 * (sum by (job) (rate(http_request_duration_seconds_sum{job="observatorium", handler="query_range"}[1m]))
/
sum by (job) (rate(http_request_duration_seconds_count{job="observatorium", handler="query_range"}[1m])))
```

![./loadtests/query_range_dur_avg.png](./loadtests/query_range_dur_avg.png)
28 changes: 18 additions & 10 deletions test/load.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ PROMETHEUS=${PROMETHEUS:-prometheus}
MOCKPROVIDER=${MOCKPROVIDER:-mockprovider}
PROMREMOTEBENCH=${PROMREMOTEBENCH:-promremotebench}

trap 'kill $(jobs -p); exit 0' EXIT

generate_report() {
printf "\tGenerating report...\n"

Expand Down Expand Up @@ -67,9 +65,8 @@ plot() {
done
}

# ---

(
run_observatorium() {
(
# In order to collect process metrics, it needs to run in container. os x doesn't support it.
platform="$(uname -s | tr '[:upper:]' '[:lower:]')"
case $platform in
Expand All @@ -91,19 +88,24 @@ plot() {
echo "unknown platform: $platform"
;;
esac
) &
) &
}

(
run_mock() {
(
$MOCKPROVIDER \
--listen=0.0.0.0:8888
) &
) &
}

(
run_prometheus() {
(
$PROMETHEUS \
--log.level=warn \
--config.file=./test/config/prometheus.yml \
--storage.tsdb.path="$(mktemp -d)"
) &
) &
}

usage="$(basename "$0") [-h] [-r n] [-c n] [-m n] [-q n] [-o csv|gnuplot] -- program to test synthetic load on observatorium api and report results.

Expand Down Expand Up @@ -160,6 +162,12 @@ while getopts "h?o:r:c:m:q:" opt; do
done
shift $((OPTIND - 1))

trap 'kill $(jobs -p); exit 0' EXIT

run_observatorium
run_mock
run_prometheus

hosts=$((number_of_clusters * number_of_machines))
printf "\tStarting with %s clusters, will run for %s.\n", "$hosts", "$run_for"

Expand Down