Untitled
unknown
plain_text
a year ago
947 B
12
Indexable
import os
from datetime import datetime
import environment_settings as env
from airflow import DAG
default_args = {
'owner': f'{env.username}',
'start_date':datetime(2024, 8, 1),
'retries': 0
}
with DAG(
'spark_dag_s7',
default_args = default_args,
schedule_interval = None,
max_active_runs = 1,
catchup = False) as dag:
from airflow.operators.bash import BashOperator
from airflow.operators.dummy import DummyOperator
start = DummyOperator(task_id='start')
end = DummyOperator(task_id='end')
# Task 1: Load local city geo data to HDFS
load_city_geo = BashOperator(
task_id = 'load_city_geo',
dag = dag,
bash_command = f"spark-submit --master local /lessons/data/scripts/load_city_geo.py /lessons/data/local_source/cities_geo.csv /user/bobax/data/ods/geo/cities_geo/"
)
Editor is loading...
Leave a Comment