Untitled
unknown
plain_text
2 years ago
12 kB
8
Indexable
from datetime import datetime, timedelta
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from cassandra.cluster import Cluster
from airflow.hooks.base import BaseHook
import pandas as pd
from cassandra.auth import PlainTextAuthProvider
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
def process_recon_results(**kwargs):
recon_start_date = kwargs['recon_start_date']
recon_end_date = kwargs['recon_end_date']
cassandra_conn_id = 'cassandrahost'
cassandra_conn = BaseHook.get_connection(cassandra_conn_id)
auth_provider = PlainTextAuthProvider(username='cassandra', password='cassandra@123')
cluster = Cluster([cassandra_conn.host], auth_provider=auth_provider)
session = cluster.connect(cassandra_conn.schema)
cassandra_query = f"SELECT * FROM hos_event_recon_summary where recon_start_date>='{recon_start_date}' and recon_end_date<='{recon_end_date}' allow filtering"
print(cassandra_query)
result_set = session.execute(cassandra_query)
df = pd.DataFrame(result_set, columns=['recon_start_date', 'recon_end_date', 'event_type', 'no_of_failed_events', 'no_of_failed_events_published_to_kafka', 'no_of_passed_events', 'no_of_passed_events_via_recon', 'total_no_of_events'])
df['recon_start_date'] = pd.to_datetime(df['recon_start_date']).dt.date
result_df = df.groupby(['recon_start_date', 'event_type']).agg({
'total_no_of_events': 'sum',
'no_of_passed_events': 'sum',
'no_of_failed_events': 'sum',
'no_of_passed_events_via_recon': 'sum'
}).reset_index()
msg = MIMEMultipart()
msg['From'] = 'hobapp_tcs-hob-sir-env01@tcs.com'
msg['To'] = ', '.join(['veerendrakumar.meka@tcs.com', 'ragulp.r@tcs.com'])
msg['Subject'] = 'Recon stats'
html = result_df.to_html(index=False)
body = MIMEText(html, 'html')
msg.attach(body)
smtp_server = '10.16.16.90'
smtp_port = 25
with smtplib.SMTP(smtp_server, smtp_port) as server:
server.sendmail(msg['From'], msg['To'], msg.as_string())
print("Mail Sent")
default_args = {
'owner': 'airflow',
'start_date': datetime(2023, 1, 1),
'retries': 1,
'retry_delay': timedelta(minutes=5),
'recon_start_date': None,
'recon_end_date': None
}
dag = DAG(
'process_recon_results',
default_args=default_args,
description='DAG for executing Cassandra query which process the recon stats',
schedule_interval=timedelta(days=1),
catchup=False,
)
execute_query_task = PythonOperator(
task_id='process_recon_results',
python_callable=process_recon_results,
dag=dag,
)
execute_query_task
in the above code iam not getting an option in airflow ui to pass an params while trigeering airflow but if you see below code iam getting the option can you please modify above code by taking reference of below code
from airflow import DAG
from airflow import AirflowException
from airflow.models import Variable
from airflow.models.xcom import XCom
from airflow.operators.bash_operator import BashOperator
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import BranchPythonOperator
from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago
from airflow.utils.edgemodifier import Label
from datetime import datetime
from sys import exc_info as sys_exc_info
from sys import stderr as sys_stderr
#*********************User Defined Functions ********************************************************************************************#
def fn_failed_params_validation(ti, **kwargs):
from airflow.utils.state import TaskInstanceState
dag_run = kwargs['dag_run']
xom_key = dag_run.dag_id + '_task_validateParams'
xom_error = ti.xcom_pull(task_ids=['task_validateParams'], key=xom_key)
dag_run.dag.set_task_instance_state(task_id="task_validateParams"
,state=TaskInstanceState.FAILED
,run_id=dag_run.run_id
)
print("XOM_ERROR")
print(xom_error)
def fn_triggerShell(ti, **kwargs):
from subprocess import Popen, PIPE
dag_run = kwargs['dag_run']
xom_key = dag_run.dag_id + '_task_validateParams'
bashCommand = ti.xcom_pull(task_ids=['task_validateParams'], key=xom_key)
bashCommand = bashCommand[0]
print("bashCommand : %s" % bashCommand)
shell = Popen(bashCommand, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True)
output, err = shell.communicate()
pid = str(shell.pid)
print("processId : %s" % pid)
print("returncode: %s" % shell.returncode)
print("output : %s" % output)
print("err : %s" % err)
if shell.returncode == 0:
print("Space check script completed")
else:
raise AirflowException("Space check script failed")
# endIf==>_shell_returncode___
def fn_preCheckOperator(**kwargs):
from configparser import ConfigParser as conf_ConfigParser
from os import environ as os_environ
from os import path as os_path
# Dag Name
print("dag_id from dag:")
dag = kwargs['dag']
dag_name = dag.dag_id
print(dag_name)
print("dag_id from task instance:")
ti = kwargs['task_instance']
dag_name = ti.dag_id
print(dag_name)
print("dag_id from dag_run:")
dag_run = kwargs['dag_run']
dag_name = dag_run.dag_id
print(dag_name)
xom_key = dag_name + '_task_validateParams'
xcom_value = "Success"
validation_flag = False
adminVariableValue = {"diskSpaceThreshold": "90", "fileSizeInMB": "1024"}
diskSpaceThreshold = None
fileSizeInMB = None
bashCommand = None
try:
file_path = os_path.realpath(__file__).replace(os_path.basename(__file__),"")
config_file = file_path.replace(".py",".config").replace("/dags/","/config/")
shell_script_name = os_path.basename(__file__).replace(".py",".sh")
print("file_path : %s" % file_path)
print("shell_script_path : %s" % shell_script_name)
print("config_file : %s" % config_file)
# Shell script details
if not os_path.exists(file_path+shell_script_name):
raise Exception("ERROR: SHELL SCRIPT MISSING.")
# Config file
if not os_path.exists(config_file):
raise AirflowException("ERROR: CONFIGURATION FILE MISSING.")
configProperties = conf_ConfigParser()
configProperties.read(config_file)
if kwargs['dag_run'].conf :
adminVariableValue = kwargs['dag_run'].conf
print ('Airflow conf variable value assigned: %s, %s' % (adminVariableValue, type(adminVariableValue)))
elif Variable.get(dag_name, None):
adminVariableValue = Variable.get(dagIdVar, None)
adminVariableValue = json.loads(adminVariableValue)
print ('Airflow UI variable value assigned: %s %s' % (adminVariableValue, type(adminVariableValue)))
elif configProperties.has_option("shellConfiguration", "adminVariableValue"):
adminVariableValue = configProperties.get('shellConfiguration', 'adminVariableValue', raw = True, fallback = None)
adminVariableValue = json.loads(adminVariableValue)
print ('Airflow configuration value assigned: %s %s' % (adminVariableValue, type(adminVariableValue)))
else:
print ('Airflow default values assigned: %s %s' % (adminVariableValue, type(adminVariableValue)))
if adminVariableValue and isinstance(adminVariableValue, dict):
if 'diskSpaceThreshold' in adminVariableValue.keys():
diskSpaceThreshold = adminVariableValue['diskSpaceThreshold']
if 'fileSizeInMB' in adminVariableValue.keys():
fileSizeInMB = adminVariableValue['fileSizeInMB']
else:
raise AirflowException("ERROR: PARAMTER VALUES ERROR. %s, %s" % (adminVariableValue, type(adminVariableValue)))
print("diskSpaceThreshold: %s" % diskSpaceThreshold)
print("fileSizeInMB : %s" % fileSizeInMB)
bashCommand = (shell_script_name + " " + diskSpaceThreshold + " " + str(fileSizeInMB) + " ")
bashCommand = "cd " + file_path + ";./" + bashCommand
print("bashCommand : %s" % bashCommand)
xcom_value = bashCommand
validation_flag = True
except Exception as errrsdb:
validation_flag = False
exception_type, exception_object, exception_traceback = sys_exc_info()
exception_file_name = exception_traceback.tb_frame.f_code.co_filename
exception_line_number = exception_traceback.tb_lineno
xcom_value = ("%s|ERROR|%s|Line No. %s|%s\n" % (datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3], exception_file_name, exception_line_number, str(errrsdb)))
print(xcom_value)
ti.xcom_push(key=xom_key, value=xcom_value)
if validation_flag:
return "task_triggerShellScript"
else:
return "task_validationFailed"
#*********************DAG Defnition ********************************************************************************************#
default_args = {'owner': 'devops'
,'depends_on_past': False
,'start_date': days_ago(2)
}
dag = DAG("devOps_diskSpaceChecker"
,default_args=default_args
,description="devOps_diskSpaceChecker"
,schedule_interval="30 3,11,19 * * *"
,catchup=False
,tags=["devops"]
,params={"diskSpaceThreshold":"72", "fileSizeInMB":"500"}
)
#*********************DAG Tasks********************************************************************************************#
beginProcess = DummyOperator(task_id = 'task_beginProcess'
,dag = dag
)
validationFailedOperator = PythonOperator(task_id='task_validationFailed'
,python_callable = fn_failed_params_validation
,dag=dag
,do_xcom_push=True
)
preCheckOperator = BranchPythonOperator(task_id = 'task_validateParams'
,python_callable = fn_preCheckOperator
,provide_context = True
,dag=dag
)
triggerShellOperator = PythonOperator(task_id='task_triggerShellScript'
,python_callable = fn_triggerShell
,dag=dag
,do_xcom_push=True
)
#*********************DAG Sequence*******************************************************************************************#
beginProcess >> Label("Validate for configuration file, shell script, arguments") >> preCheckOperator
preCheckOperator >> Label("Validation failed") >> validationFailedOperator
preCheckOperator >> Label("Validation success") >> triggerShellOperator
Editor is loading...
Leave a Comment