Untitled

mail@pastecode.io avatar
unknown
plain_text
17 days ago
947 B
4
Indexable
Never
import os
from datetime import datetime
import environment_settings as env
from airflow import DAG

default_args = {
                'owner': f'{env.username}',
                'start_date':datetime(2024, 8, 1),
                'retries': 0
                }

with DAG(
    'spark_dag_s7',
    default_args = default_args,
    schedule_interval = None,
    max_active_runs = 1,
    catchup = False) as dag:

    from airflow.operators.bash import BashOperator
    from airflow.operators.dummy import DummyOperator

    start = DummyOperator(task_id='start')
    end = DummyOperator(task_id='end')

    # Task 1: Load local city geo data to HDFS
    load_city_geo = BashOperator(
        task_id = 'load_city_geo',
        dag = dag,
        bash_command = f"spark-submit --master local /lessons/data/scripts/load_city_geo.py /lessons/data/local_source/cities_geo.csv /user/bobax/data/ods/geo/cities_geo/"
        )

Leave a Comment