Untitled
unknown
plain_text
17 days ago
947 B
4
Indexable
Never
import os from datetime import datetime import environment_settings as env from airflow import DAG default_args = { 'owner': f'{env.username}', 'start_date':datetime(2024, 8, 1), 'retries': 0 } with DAG( 'spark_dag_s7', default_args = default_args, schedule_interval = None, max_active_runs = 1, catchup = False) as dag: from airflow.operators.bash import BashOperator from airflow.operators.dummy import DummyOperator start = DummyOperator(task_id='start') end = DummyOperator(task_id='end') # Task 1: Load local city geo data to HDFS load_city_geo = BashOperator( task_id = 'load_city_geo', dag = dag, bash_command = f"spark-submit --master local /lessons/data/scripts/load_city_geo.py /lessons/data/local_source/cities_geo.csv /user/bobax/data/ods/geo/cities_geo/" )
Leave a Comment