code
code
import concurrent.futures
import os
import glob
import zipfile
import sys
import pytz
os.environ['SPARK_HOME'] = '/opt/mapr/spark/spark'
os.environ['PYSPARK_PYTHON'] = '/opt/python/python37/bin/python'
spark_python = os.path.join(os.environ.get('SPARK_HOME', None), 'python')
py4j = glob.glob(os.path.join(spark_python, 'lib', 'py4j-*.zip'))[0]
sys.path[:0] = [spark_python, py4j]
os.environ['PYTHONPATH'] = py4j
RESPONSE_LIMIT = 200000
class KriExecutor:
def __init__(self, app_name, property_file, db_conf, spark_conf='SparkConf'):
ConfigPropertyParser(os.path.join(CONFIG_DIR, property_file))
self.spark =
SparkSessionBuilder(app_name).create_spark_session(conf_path=spark_conf)
self.database = Database(conf_path=db_conf)
self.concurrent_exc_future = []
self.columns_filter = ColumnsFilter(database=self.database)
def get_spark_queries(self):
with self.database.get_session() as session:
Lookup.get_lookup_ids(session)
# filter out valid kris scheduled/submitted within timeDelta
time_delta = ConfigPropertyParser.get_property('JobConf', 'timeDelta')
valid_submit_sch_kri_dt = session.query(KriDetail) \
.filter(func.coalesce(KriDetail.kri_query_tx, '') != '') \
.filter(KriDetail.kri_sta_cd_id == Lookup.lookup_dict['Approved'],
~KriDetail.exec_sched_regex_tx.is_(None)) \
.all()
zip_file_name =
f'test_result_{spark_kri.kri_sk_id}_{str(datetime.now(MST).strftime("%Y-%m-
%d"))}.csv'
file_name = FILE_SAVE_LOCATION % (str(spark_kri.kri_exec_id),
str(spark_kri.kri_sk_id))
result_df_string =
result_df.fillna('').repartition(1).toPandas().to_csv(
header=True, index=False, quotechar='"',
doublequote=True, line_terminator='\n')
os.path.getsize(file_name)))
"""
result_df.fillna('').repartition(1).toPandas().to_csv(
path_or_buf=FILE_SAVE_LOCATION %
str(spark_kri.kri_exec_id),
header=True, index=False, quotechar='"',
compression={'method': 'zip',
'compresslevel': 1,
'archive_name': file_name},
doublequote=True, line_terminator='\n')
"""
except CapturedException as ex:
exception_str = 'Error at spark level for kri_exec %s -> %s' %
(spark_kri.kri_exec_id, str(ex.desc))
except Py4JError as ex:
# To get spark java error part as message
if isinstance(ex, Py4JJavaError):
err_msg = ex.errmsg
ex = str(ex)
idx = ex.find('Caused by:')
if idx != -1:
ex = ex[idx:]
idx = ex.find("\n\t")
ex = "{0}:{1}".format(err_msg, ex[:idx])
else:
ex = err_msg
else:
ex = ex.cause
exception_str = 'Py4jError for kri_exec %s|%s -> %s' %
(spark_kri.kri_sk_id, spark_kri.kri_exec_id, str(ex))
except Exception as ex:
exception_str = 'Unknown Exception for kri_exec %s|%s -> %s' %
(spark_kri.kri_sk_id, spark_kri.kri_exec_id, str(ex))
else:
# success
spark_kri.update_state(exec_end_ts=datetime.now(MST),
exec_sta_cd=Lookup.lookup_dict['SUCCESS'],
kri_test_fail_in=cnt > 0,
test_ent_fail_ct=cnt) \
.change_state_event(session, 'SUCCESS')
finally:
# print(exception_str)
if exception_str:
# failure
spark_kri.update_state(exec_end_ts=datetime.now(MST),
exec_sta_cd=Lookup.lookup_dict['FAILED'],
err_msg_tx=str(exception_str),
err_log_path_tx='') \
.change_state_event(session, 'FAILED')
return spark_kri
def execute(self):
spark_query_list = self.get_spark_queries()
exec_sta_cd=Lookup.lookup_dict['FAILED'],
err_msg_tx=str(ex),
err_log_path_tx='') \
.change_state_event(session, 'FAILED')
failure_msg = 'Error at main for kri_exec %s -> %s' %
(spark_kri.kri_exec_id, str(ex))
self.concurrent_exc_future.append(failure_msg)
def exit_on_concurrent_status(self):
if self.concurrent_exc_future:
print(*self.concurrent_exc_future, sep='\n')
exit(1)
exit(0)
if __name__ == '__main__':
KriExecutor(app_name='ict_kri_exec', property_file='config.prop',
db_conf='PostgresConf').execute()
scripts/config_parser.py
This is config_parser.py code:
import configparser
class ConfigPropertyParser:
__config = configparser.RawConfigParser()
__config.optionxform = str
__section_dict = None
@classmethod
def get_items(cls, conf_path):
return cls.__section_dict.get(conf_path)
@classmethod
def get_property(cls, conf_path, conf_property, default=None):
return cls.__section_dict.get(conf_path).get(conf_property, default)
scripts/pii_columns_management.py
This is pii_columns_management.py file:
import json
import os
import requests
import re
class ColumnsFilter:
try:
cdm_response = requests.post(self.__cdm_api_url, headers={"Content-
Type": "application/json"},
data=json.dumps(cdm_request),
verify=os.environ.get('TLS_GSO_CERT_PATH'))
cdm_response = json.loads(json.dumps(cdm_response.json()))['response']
[0]['result_list']
return list(set(map(lambda role: role['role_id'], cdm_response)))
except Exception as err:
print(err)
return []
@staticmethod
def extract_columns_frm_query(query):
find_columns_regex = re.compile(r'Select(?s)(.*?)FROM.*', re.IGNORECASE)
return find_columns_regex.search(query).group(1).split(',')
@staticmethod
def get_tb_nm(query):
find_tb_nm = re.compile(r'from\s+(\w+)', re.IGNORECASE)
return find_tb_nm.search(query).group(1)
@staticmethod
def update_columns_in_query(query, updated_cols):
find_columns_regex = re.compile(r'Select(?s)(.*?)FROM.*', re.IGNORECASE)
return query.replace(find_columns_regex.search(query).group(1), ' ' + ',
'.join(updated_cols) + ' ')
@staticmethod
def get_accessible_columns(col_dtl_list, query_cols, pii_roles):
accessible_cols = set([])
for col_name in col_dtl_list:
if '*' in query_cols or col_name['col_name'] in query_cols:
if col_name['pii_role_id'] in pii_roles or col_name['pii_role_id']
is None:
accessible_cols.add(col_name['col_name'])
return accessible_cols
scripts/smtp_trigger.py
This is smtp_trigger.py file:
import mimetypes
import smtplib
from pytz import timezone
from datetime import datetime
from email import encoders
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
MST = timezone('MST')
MAX_ATTEMPT = 3
FIXED_WAIT = 10000
class TriggerEmail:
__sub_success_str = 'Result for ICT test script {kri_nm} execution'
__success_html_body = """\
<html>
<head></head>
<body>
<p>Hi there,</p>
<p>Please find your Test script result in the email
attachment.</p>
<div>
<p>With regards,<br>Team ICT</p>
</div>
</body>
</html>
"""
__error_html_body = """\
<html>
<head></head>
<body>
<p>Hi there,</p>
<p>Your test script execution has been failed, please reach out
to admin or visit us to our APCOT website.
</p>
<div>
<p>With regards,<br>Team ICT</p>
</div>
</body>
</html>
"""
__sub_error_str = 'Failed for ICT test script {kri_nm} execution'
@classmethod
def set_mail_params(cls, **mail_params):
cls.MAIL_PARAMS.update(mail_params)
cls.MAIL_PARAMS['port'] = mail_params.get('port', 465 if
mail_params.get('SSL', False) else 25)
return cls
def initialize_email(self):
self.__msg["From"] = TriggerEmail.MAIL_PARAMS.get('from_email')
self.__msg["To"] = self.__to_email
subject = (TriggerEmail.__sub_error_str if not self.__passed else
TriggerEmail.__sub_success_str) \
.format(kri_nm=self.__kri_nm)
if TriggerEmail.MAIL_PARAMS.get('secure'):
subject = "SecloreSecure " + subject
self.__msg.preamble = subject
self.__msg["Subject"] = subject
return self
def create_attachment(self):
if not self.__passed:
body = TriggerEmail.__error_html_body
else:
body = TriggerEmail.__success_html_body
content_type, encoding = mimetypes.guess_type(self.__file_to_attach)
if content_type is None or encoding is not None:
content_type = "application/octet-stream"
filename=f"test_result_{self.__kri_sk_id}_{str(datetime.now(MST).strftime('%Y-%m-
%d'))}.csv.zip")
attachment.add_header("X-seclore-encrypt", 'true')
self.__msg.attach(attachment)
self.__msg.attach(MIMEText(body, 'html'))
return self
@retry(stop_max_attempt_number=MAX_ATTEMPT, wait_fixed=FIXED_WAIT)
def send_email(self):
try:
host = TriggerEmail.MAIL_PARAMS.get('server_address')
from_email = TriggerEmail.MAIL_PARAMS.get('from_email')
port = TriggerEmail.MAIL_PARAMS.get('port')
assert host is not None
assert from_email is not None
self.initialize_email().create_attachment()
smtp_obj = smtplib.SMTP(host=host, port=port)
smtp_obj.sendmail(from_email, self.__to_email, self.__msg.as_string())
smtp_obj.quit()
except AssertionError:
print('Error: Call class method set_mail_params & then create
instance')
raise
except smtplib.SMTPException as err:
# print('Error: Unable to send email because %s', err)
raise smtplib.SMTPException(f'Unable to send email for kri
{self.__kri_sk_id}-> {str(err)}')
except Exception as exc:
raise Exception('Failed to trigger email for kri %s-> %s' %
(self.__kri_sk_id, exc))
else:
print('Successfully sent email for kri %s' % self.__kri_sk_id)
scripts/spark_session.py
This is spark_session.py file:
import pyspark
import pyspark.sql
from pyspark.sql import SparkSession
from retrying import retry
MAX_ATTEMPT = 3
FIXED_WAIT = 10000
class SparkSessionBuilder:
def __init__(self, app_name, stop_max_attempt_number=MAX_ATTEMPT,
wait_fixed=FIXED_WAIT):
global MAX_ATTEMPT, FIXED_WAIT
MAX_ATTEMPT = stop_max_attempt_number
FIXED_WAIT = wait_fixed
self.app_name = app_name
self.spark = None
@retry(stop_max_attempt_number=MAX_ATTEMPT, wait_fixed=FIXED_WAIT)
def create_spark_session(self, conf_path='SparkConf', log_level='ERROR'):
spark_conf = list(ConfigPropertyParser.get_items(conf_path).items())
conf = pyspark.SparkConf().setAll(spark_conf)
self.spark = SparkSession.builder \
.appName(self.app_name) \
.config(conf=conf) \
.enableHiveSupport() \
.getOrCreate()
# print(self.spark.catalog.listDatabases())
self.spark.sparkContext.setLogLevel(log_level)
print('Spark session created')
return self.spark
alchemy_model/ict_model.py
This is ict_model.py file:
import contextlib
import uuid
from datetime import datetime
from typing import Iterator
import sqlalchemy as db
from croniter import croniter
from sqlalchemy import Column, Integer, Boolean, ForeignKey, BigInteger, Sequence
from sqlalchemy.sql import func
from sqlalchemy.dialects.postgresql import UUID, TIMESTAMP, VARCHAR, JSON
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session, sessionmaker, scoped_session
from sqlalchemy import exc
Base = declarative_base()
class Lookup(Base):
__tablename__ = 'lookup'
__table_args__ = {'schema': 'ict'}
lookup_dict = {}
lookup_id = Column(Integer, Sequence('lookup_lookup_id_seq'), primary_key=True)
lookup_val_tx = Column(VARCHAR, nullable=False)
lookup_type_cd = Column(VARCHAR, nullable=False)
@staticmethod
def get_lookup_ids(session):
tup_lst = session.query(Lookup).with_entities(Lookup.lookup_val_tx,
Lookup.lookup_id).filter(
(Lookup.lookup_type_cd == 'kriExec') | (Lookup.lookup_val_tx ==
'Approved')).all()
for k, v in tup_lst:
Lookup.lookup_dict[k] = v
def __repr__(self):
return "%s \t%s \t%s\n" % (self.lookup_id, self.lookup_val_tx,
self.lookup_type_cd)
class KriDetail(Base):
__tablename__ = 'kri_dtl'
__table_args__ = {'schema': 'ict'}
kri_sk_id: Column = Column(Integer, Sequence('kri_dtl_kri_sk_id_seq'),
primary_key=True)
kri_nm = Column(VARCHAR, default='', nullable=True)
kri_query_tx = Column(VARCHAR, nullable=False)
exec_sched_regex_tx = Column(VARCHAR, nullable=True)
kri_sta_cd_id = Column(Integer, ForeignKey(Lookup.lookup_type_cd),
nullable=False)
# TODO: Write email validator via regex and tag it to the column
to_email = Column(VARCHAR, name='kri_creat_user_nm', nullable=True)
def __repr__(self):
return "%s \t%s \t%s \t%s \t%s\n" % (self.kri_sk_id, self.kri_query_tx,
self.kri_sta_cd_id,
self.exec_sched_regex_tx, self.to_email)
class KriExec(Base):
__tablename__ = 'kri_exec'
__table_args__ = {'schema': 'ict'}
kri_exec_id = Column(UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
unique=True)
kri_sk_id = Column(Integer, ForeignKey(KriDetail.kri_sk_id))
exec_sta_cd = Column(Integer, ForeignKey(Lookup.lookup_type_cd))
exec_strt_ts = Column(TIMESTAMP(timezone=True), nullable=True)
exec_end_ts = Column(TIMESTAMP(timezone=True), nullable=True)
err_msg_tx = Column(VARCHAR)
err_log_path_tx = Column(VARCHAR)
kri_test_fail_in = Column(Boolean)
test_ent_fail_ct = Column(BigInteger)
def __repr__(self):
return "%s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\n" % (
self.kri_exec_id, self.kri_sk_id, self.exec_sta_cd, self.exec_strt_ts,
self.exec_end_ts, self.err_msg_tx,
self.err_log_path_tx, self.kri_test_fail_in, self.test_ent_fail_ct)
class SparkKri(KriExec):
__valid_kwargs = {'kri_exec_id', 'kri_sk_id', 'exec_sta_cd', 'exec_strt_ts',
'exec_end_ts', 'err_msg_tx', 'err_log_path_tx',
'kri_test_fail_in', 'test_ent_fail_ct'}
@staticmethod
def validate_query(query):
if query:
query = query.strip()
# to add more validations
return query
def inprogress_mapping(self):
return {KriExec.exec_strt_ts: self.exec_strt_ts, KriExec.exec_sta_cd:
self.exec_sta_cd}
def success_mapping(self):
return {KriExec.exec_end_ts: self.exec_end_ts, KriExec.exec_sta_cd:
self.exec_sta_cd,
KriExec.kri_test_fail_in: self.kri_test_fail_in,
KriExec.test_ent_fail_ct: self.test_ent_fail_ct}
def failure_mapping(self):
return {KriExec.exec_end_ts: self.exec_end_ts, KriExec.exec_sta_cd:
self.exec_sta_cd,
KriExec.err_msg_tx: self.err_msg_tx, KriExec.err_log_path_tx:
self.err_log_path_tx}
@property
def mapping_dispatcher(self):
return {'IN_PROGRESS': self.inprogress_mapping, 'SUCCESS':
self.success_mapping, 'FAILED': self.failure_mapping}
class TableConfig(Base):
__tablename__ = 'kri_query_tbl_config'
__table_args__ = {'schema': 'ict'}
lookup_dict = {}
tbl_nm = Column(VARCHAR, primary_key=True)
clmn_config_json_tx = Column(JSON, nullable=False)
@staticmethod
def get_columns_dtl(session, tb_nm):
return
session.query(func.jsonb_array_elements(TableConfig.clmn_config_json_tx)) \
.filter(TableConfig.tbl_nm == tb_nm).all()
class Database:
def __init__(self, conf_path='PostgresConf'):
creds_dict = ConfigPropertyParser.get_items(conf_path)
url = db.engine.url.URL(**creds_dict)
self.engine = db.create_engine(url, pool_pre_ping=True)
self.session = scoped_session(sessionmaker(bind=self.engine))
@contextlib.contextmanager
def get_session(self) -> Iterator[Session]:
session: Session = self.session()
try:
yield session
except db.exc.SQLAlchemyError as ex:
session.rollback()
raise db.exc.SQLAlchemyError(f'Error at pgsql ops and session ->
{str(ex)}, updates has been rolled back')
except Exception:
session.rollback()
raise
finally:
session.close()