Skip to content

hzhangse/SparkEventListener4Measure

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

2 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

SparkEventListener4Measure

To collect spark measure by injecting spark eventListener and push them into prometheus tsdb using prom-remote writer

Build and package

   sbt package 
   docker build -f docker/dockerfile -t registry.cn-shanghai.aliyuncs.com/xxx/spark:v3.3-prom .
   docker push registry.cn-shanghai.aliyuncs.com/xxx/spark:v3.3-prom

Scala CLI, Spark 3.x

config setting

 --conf spark.extraListeners=com.hzhangse.sparkmeasure.PrometheusSink     \
 --conf spark.sparkmeasure.prometheusURL="http://xxx.xxx.xxx.xxx:30003/api/v1/write"     \
 --conf spark.sparkmeasure.prometheusStagemetrics=true     \
 --conf spark.metrics.appStatusSource.enabled=true     \
 --conf spark.ui.prometheus.enabled=true \
 --conf spark.kubernetes.driver.annotation.prometheus.io/scrape=true \
 --conf spark.kubernetes.driver.annotation.prometheus.io/path=/metrics/executors/prometheus \
 --conf spark.kubernetes.driver.annotation.prometheus.io/port=4040 \
 --conf "spark.metrics.conf.*.source.jvm.class"="org.apache.spark.metrics.source.JvmSource"     \
 --conf "spark.driver.extraJavaOptions"="-Divy.cache.dir=/tmp -Divy.home=/tmp"    \
 --conf "spark.driver.extraClassPath=/opt/spark/plugins/*:/opt/spark/listeners/*:/opt/spark/listeners/lib/*"   \
 --conf "spark.executor.extraClassPath=/opt/spark/plugins/*"    

The real spark submit scripts show below:

/opt/spark/bin/spark-submit     --master k8s://https://xxx.xxx.xxx.xxx:6443  \
 --deploy-mode cluster  --name hello-0    \
 --class "org.apache.spark.examples.SparkPi"     \
 --driver-memory="500M"       --executor-memory="500M"      \
 --conf "spark.driver.memory=500M"   --conf "spark.driver.memoryOverhead=512M"    \
 --conf "spark.executor.memory=500M"     --conf "spark.executor.memoryOverhead=512M"     \
 --conf "spark.eventLog.enabled=false"      \
 --conf "spark.master=k8s://https://xxx.xxx.xxx.xxx:6443"    \
 --conf "spark.app.name=hello-0"    \
 **--conf spark.extraListeners=com.hzhangse.sparkmeasure.PrometheusSink     \
 --conf spark.sparkmeasure.prometheusURL="http://xxx.xxx.xxx.xxx:30003/api/v1/write"     \
 --conf spark.sparkmeasure.prometheusStagemetrics=true     \
 --conf spark.metrics.appStatusSource.enabled=true     \
 --conf spark.ui.prometheus.enabled=true \
 --conf spark.kubernetes.driver.annotation.prometheus.io/scrape=true \
 --conf spark.kubernetes.driver.annotation.prometheus.io/path=/metrics/executors/prometheus \
 --conf spark.kubernetes.driver.annotation.prometheus.io/port=4040 \
 --conf "spark.metrics.conf.*.source.jvm.class"="org.apache.spark.metrics.source.JvmSource"     \
 --conf "spark.driver.extraJavaOptions"="-Divy.cache.dir=/tmp -Divy.home=/tmp"    \
 --conf "spark.driver.extraClassPath=/opt/spark/plugins/*:/opt/spark/listeners/*:/opt/spark/listeners/lib/*"   \
 --conf "spark.executor.extraClassPath=/opt/spark/plugins/*"    \**
 --conf "spark.kubernetes.container.image.pullPolicy=Always"     \
 --conf "spark.kubernetes.namespace=default"      \
 --conf "spark.kubernetes.driver.master= https://10.1.2.61:6443"  \
 --conf "spark.kubernetes.scheduler.name=volcano"      \
 --conf "spark.kubernetes.scheduler.volcano.podGroupTemplateFile=pg-hello-0.yaml"    \
 --conf "spark.kubernetes.driver.pod.featureSteps=org.apache.spark.deploy.k8s.features.VolcanoFeatureStep"   \
 --conf "spark.kubernetes.driver.label.app=hello-0"     \
 --conf "spark.kubernetes.driver.request.cores=1"     \
 --conf "spark.kubernetes.driver.limit.cores=1"     \
 --conf "spark.kubernetes.driver.secrets.apiserver=/opt/pki"     \
 --conf "spark.kubernetes.executor.deleteOnTermination=true"    \
 --conf "spark.kubernetes.executor.request.cores=1"    \
 --conf "spark.kubernetes.executor.limit.cores=1"   \
 --conf "spark.kubernetes.executor.podNamePrefix=hello-0"     \
 --conf "spark.kubernetes.executor.scheduler.name=volcano"     \
 --conf "spark.kubernetes.executor.pod.featureSteps=org.apache.spark.deploy.k8s.features.VolcanoFeatureStep"   \
 --conf "spark.kubernetes.authenticate.driver.mounted.oauthTokenFile=/opt/pki/token"     \
 --conf "spark.kubernetes.authenticate.driver.caCertFile=/opt/pki/ca.crt"     \
 --conf "spark.kubernetes.authenticate.driver.serviceAccountName=my-release-spark"   \
 --conf "spark.kubernetes.authenticate.driver.oauthTokenFile=/opt/pki/token"    \
 --conf "spark.kubernetes.authenticate.executor.serviceAccountName= my-release-spark"     \
 --conf "spark.kubernetes.authenticate.caCertFile=/opt/pki/ca.crt"   \
 --conf "spark.kubernetes.authenticate.oauthTokenFile=/opt/pki/token"    \
 --conf "spark.dynamicAllocation.executorIdleTimeout=10s"     \
 --conf "spark.dynamicAllocation.cachedExecutorIdleTimeout=200s"    \
 --conf "spark.dynamicAllocation.minExecutors=1"   \
 --conf "spark.dynamicAllocation.initialExecutors=1"     \
 --conf "spark.dynamicAllocation.maxExecutors=2"     \
 --conf "spark.dynamicAllocation.executorAllocationRatio=0.5"    \
 --conf "spark.dynamicAllocation.enabled=true"     \
 --conf "spark.dynamicAllocation.shuffleTracking.enabled=true"       \
 --conf "spark.kubernetes.container.image=registry.cn-shanghai.aliyuncs.com/xxxxx/spark:v3.3-prom"     \
 local:///opt/spark/examples/jars/spark-examples_2.12-3.3.0-SNAPSHOT.jar 100000

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

 
 
 

Contributors

Languages