Skip to content

Commit c42be84

Browse files
Initial Commit
1 parent 049167c commit c42be84

40 files changed

+1414
-0
lines changed

SBDL - Starter/.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Default ignored files
2+
/workspace.xml
3+
.idea/*
4+
lib/__pycache__/*
5+

SBDL - Starter/Jenkinsfile

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
pipeline {
2+
agent any
3+
4+
stages {
5+
stage('Build') {
6+
steps {
7+
sh 'pipenv --python python3 sync'
8+
}
9+
}
10+
stage('Test') {
11+
steps {
12+
sh 'pipenv run pytest'
13+
}
14+
}
15+
stage('Package') {
16+
when{
17+
anyOf{ branch "master" ; branch 'release' }
18+
}
19+
steps {
20+
sh 'zip -r sbdl.zip lib'
21+
}
22+
}
23+
stage('Release') {
24+
when{
25+
branch 'release'
26+
}
27+
steps {
28+
sh "scp -i /home/prashant/cred/edge-node_key.pem -o 'StrictHostKeyChecking no' -r sbdl.zip log4j.properties sbdl_main.py sbdl_submit.sh conf [email protected]:/home/prashant/sbdl-qa"
29+
}
30+
}
31+
stage('Deploy') {
32+
when{
33+
branch 'master'
34+
}
35+
steps {
36+
sh "scp -i /home/prashant/cred/edge-node_key.pem -o 'StrictHostKeyChecking no' -r sbdl.zip log4j.properties sbdl_main.py sbdl_submit.sh conf [email protected]:/home/prashant/sbdl-prod"
37+
}
38+
}
39+
}
40+
}

SBDL - Starter/Pipfile

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[[source]]
2+
url = "https://pypi.org/simple"
3+
verify_ssl = true
4+
name = "pypi"
5+
6+
[packages]
7+
pyspark = "*"
8+
pytest = "*"
9+
10+
[dev-packages]
11+
12+
[requires]
13+
python_version = "3.10"
14+

SBDL - Starter/conf/sbdl.conf

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
[LOCAL]
2+
enable.hive = false
3+
hive.database = null
4+
kafka.topic = sbdl_kafka_cloud
5+
[QA]
6+
enable.hive = true
7+
hive.database = sbdl_db_qa
8+
kafka.topic = sbdl_kafka_qa
9+
[PROD]
10+
enable.hive = true
11+
hive.database = sbdl_db
12+
kafka.topic = sbdl_kafka
13+

SBDL - Starter/conf/spark.conf

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
[LOCAL]
2+
spark.app.name = sbdl-local
3+
spark.executor.instances = 2
4+
spark.executor.cores = 1
5+
spark.executor.memory = 1G
6+
spark.sql.shuffle.partitions = 5
7+
[QA]
8+
spark.app.name = sbdl-qa
9+
spark.executor.instances = 2
10+
spark.executor.cores = 1
11+
spark.executor.memory = 4G
12+
spark.sql.shuffle.partitions = 1000
13+
[PROD]
14+
spark.app.name = sbdl
15+
spark.executor.instances = 2
16+
spark.executor.cores = 1
17+
spark.executor.memory = 4G
18+
spark.sql.shuffle.partitions = 1000
19+
20+

SBDL - Starter/lib/Utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from pyspark.sql import SparkSession
2+
3+
4+
def get_spark_session(env):
5+
if env == "LOCAL":
6+
return SparkSession.builder \
7+
.config('spark.driver.extraJavaOptions',
8+
'-Dlog4j.configuration=file:log4j.properties') \
9+
.master("local[2]") \
10+
.enableHiveSupport() \
11+
.getOrCreate()
12+
else:
13+
return SparkSession.builder \
14+
.enableHiveSupport() \
15+
.getOrCreate()
16+
17+

SBDL - Starter/lib/__init__.py

Whitespace-only changes.

SBDL - Starter/lib/logger.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
class Log4j(object):
2+
def __init__(self, spark):
3+
log4j = spark._jvm.org.apache.log4j
4+
self.logger = log4j.LogManager.getLogger("sbdl")
5+
6+
def warn(self, message):
7+
self.logger.warn(message)
8+
9+
def info(self, message):
10+
self.logger.info(message)
11+
12+
def error(self, message):
13+
self.logger.error(message)
14+
15+
def debug(self, message):
16+
self.logger.debug(message)
17+
18+

SBDL - Starter/log4j.properties

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Set everything to be logged to the console
2+
log4j.rootCategory=WARN, console
3+
4+
# define console appender
5+
log4j.appender.console=org.apache.log4j.ConsoleAppender
6+
log4j.appender.console.target=System.out
7+
log4j.appender.console.layout=org.apache.log4j.PatternLayout
8+
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
9+
10+
#application log
11+
log4j.logger.sbdl=INFO, console, file
12+
log4j.additivity.sbdl=false
13+
14+
#define following in Java System
15+
# -Dlog4j.configuration=file:log4j.properties
16+
17+
# Recommendations from Spark template
18+
log4j.logger.org.apache.spark.repl.Main=WARN
19+
log4j.logger.org.spark_project.jetty=WARN
20+
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
21+
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
22+
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
23+
log4j.logger.org.apache.parquet=ERROR
24+
log4j.logger.parquet=ERROR
25+
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
26+
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
27+

SBDL - Starter/sbdl_main.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import sys
2+
3+
from lib import Utils
4+
from lib.logger import Log4j
5+
6+
if __name__ == '__main__':
7+
8+
if len(sys.argv) < 3:
9+
print("Usage: sbdl {local, qa, prod} {load_date} : Arguments are missing")
10+
sys.exit(-1)
11+
12+
job_run_env = sys.argv[1].upper()
13+
load_date = sys.argv[2]
14+
15+
spark = Utils.get_spark_session(job_run_env)
16+
logger = Log4j(spark)
17+
18+
logger.info("Finished creating Spark Session")

0 commit comments

Comments
 (0)