To collect spark measure by injecting spark eventListener and push them into prometheus tsdb using prom-remote writer
sbt package
docker build -f docker/dockerfile -t registry.cn-shanghai.aliyuncs.com/xxx/spark:v3.3-prom .
docker push registry.cn-shanghai.aliyuncs.com/xxx/spark:v3.3-prom
config setting
--conf spark.extraListeners=com.hzhangse.sparkmeasure.PrometheusSink \
--conf spark.sparkmeasure.prometheusURL="http://xxx.xxx.xxx.xxx:30003/api/v1/write" \
--conf spark.sparkmeasure.prometheusStagemetrics=true \
--conf spark.metrics.appStatusSource.enabled=true \
--conf spark.ui.prometheus.enabled=true \
--conf spark.kubernetes.driver.annotation.prometheus.io/scrape=true \
--conf spark.kubernetes.driver.annotation.prometheus.io/path=/metrics/executors/prometheus \
--conf spark.kubernetes.driver.annotation.prometheus.io/port=4040 \
--conf "spark.metrics.conf.*.source.jvm.class"="org.apache.spark.metrics.source.JvmSource" \
--conf "spark.driver.extraJavaOptions"="-Divy.cache.dir=/tmp -Divy.home=/tmp" \
--conf "spark.driver.extraClassPath=/opt/spark/plugins/*:/opt/spark/listeners/*:/opt/spark/listeners/lib/*" \
--conf "spark.executor.extraClassPath=/opt/spark/plugins/*"
The real spark submit scripts show below:
/opt/spark/bin/spark-submit --master k8s://https://xxx.xxx.xxx.xxx:6443 \
--deploy-mode cluster --name hello-0 \
--class "org.apache.spark.examples.SparkPi" \
--driver-memory="500M" --executor-memory="500M" \
--conf "spark.driver.memory=500M" --conf "spark.driver.memoryOverhead=512M" \
--conf "spark.executor.memory=500M" --conf "spark.executor.memoryOverhead=512M" \
--conf "spark.eventLog.enabled=false" \
--conf "spark.master=k8s://https://xxx.xxx.xxx.xxx:6443" \
--conf "spark.app.name=hello-0" \
**--conf spark.extraListeners=com.hzhangse.sparkmeasure.PrometheusSink \
--conf spark.sparkmeasure.prometheusURL="http://xxx.xxx.xxx.xxx:30003/api/v1/write" \
--conf spark.sparkmeasure.prometheusStagemetrics=true \
--conf spark.metrics.appStatusSource.enabled=true \
--conf spark.ui.prometheus.enabled=true \
--conf spark.kubernetes.driver.annotation.prometheus.io/scrape=true \
--conf spark.kubernetes.driver.annotation.prometheus.io/path=/metrics/executors/prometheus \
--conf spark.kubernetes.driver.annotation.prometheus.io/port=4040 \
--conf "spark.metrics.conf.*.source.jvm.class"="org.apache.spark.metrics.source.JvmSource" \
--conf "spark.driver.extraJavaOptions"="-Divy.cache.dir=/tmp -Divy.home=/tmp" \
--conf "spark.driver.extraClassPath=/opt/spark/plugins/*:/opt/spark/listeners/*:/opt/spark/listeners/lib/*" \
--conf "spark.executor.extraClassPath=/opt/spark/plugins/*" \**
--conf "spark.kubernetes.container.image.pullPolicy=Always" \
--conf "spark.kubernetes.namespace=default" \
--conf "spark.kubernetes.driver.master= https://10.1.2.61:6443" \
--conf "spark.kubernetes.scheduler.name=volcano" \
--conf "spark.kubernetes.scheduler.volcano.podGroupTemplateFile=pg-hello-0.yaml" \
--conf "spark.kubernetes.driver.pod.featureSteps=org.apache.spark.deploy.k8s.features.VolcanoFeatureStep" \
--conf "spark.kubernetes.driver.label.app=hello-0" \
--conf "spark.kubernetes.driver.request.cores=1" \
--conf "spark.kubernetes.driver.limit.cores=1" \
--conf "spark.kubernetes.driver.secrets.apiserver=/opt/pki" \
--conf "spark.kubernetes.executor.deleteOnTermination=true" \
--conf "spark.kubernetes.executor.request.cores=1" \
--conf "spark.kubernetes.executor.limit.cores=1" \
--conf "spark.kubernetes.executor.podNamePrefix=hello-0" \
--conf "spark.kubernetes.executor.scheduler.name=volcano" \
--conf "spark.kubernetes.executor.pod.featureSteps=org.apache.spark.deploy.k8s.features.VolcanoFeatureStep" \
--conf "spark.kubernetes.authenticate.driver.mounted.oauthTokenFile=/opt/pki/token" \
--conf "spark.kubernetes.authenticate.driver.caCertFile=/opt/pki/ca.crt" \
--conf "spark.kubernetes.authenticate.driver.serviceAccountName=my-release-spark" \
--conf "spark.kubernetes.authenticate.driver.oauthTokenFile=/opt/pki/token" \
--conf "spark.kubernetes.authenticate.executor.serviceAccountName= my-release-spark" \
--conf "spark.kubernetes.authenticate.caCertFile=/opt/pki/ca.crt" \
--conf "spark.kubernetes.authenticate.oauthTokenFile=/opt/pki/token" \
--conf "spark.dynamicAllocation.executorIdleTimeout=10s" \
--conf "spark.dynamicAllocation.cachedExecutorIdleTimeout=200s" \
--conf "spark.dynamicAllocation.minExecutors=1" \
--conf "spark.dynamicAllocation.initialExecutors=1" \
--conf "spark.dynamicAllocation.maxExecutors=2" \
--conf "spark.dynamicAllocation.executorAllocationRatio=0.5" \
--conf "spark.dynamicAllocation.enabled=true" \
--conf "spark.dynamicAllocation.shuffleTracking.enabled=true" \
--conf "spark.kubernetes.container.image=registry.cn-shanghai.aliyuncs.com/xxxxx/spark:v3.3-prom" \
local:///opt/spark/examples/jars/spark-examples_2.12-3.3.0-SNAPSHOT.jar 100000