Untitled
unknown
plain_text
19 days ago
2.7 kB
3
Indexable
%%time # Define initial parameters p1 = 10000000 # rows to table p2 = 1337 # random seed for np BIG_DAG = { "activities": [ # Initial DAG: NB_1 and NB_2 in parallel, then NB_3 after NB_1 { "name": "Initial_NB_1", "path": "NB_1_final", "timeoutPerCellInSeconds": 120, "args": {"p1": p1, "p2": p2} }, { "name": "Initial_NB_2", "path": "NB_2_final", "timeoutPerCellInSeconds": 120, "args": {"p1": p1, "p2": p2}, "retry": 1, "retryIntervalInSeconds": 10 }, { "name": "Initial_NB_3", "path": "NB_3_final", "timeoutPerCellInSeconds": 120, "dependencies": ["Initial_NB_1"], "args": {"p1": p1, "p2": p2}, "retry": 1, "retryIntervalInSeconds": 10 }, # Then run NB_1 twice { "name": "NB_1_second", "path": "NB_1_final", "timeoutPerCellInSeconds": 120, "dependencies": ["Initial_NB_3"], "args": {"p1": p1, "p2": p2} }, { "name": "NB_1_third", "path": "NB_1_final", "timeoutPerCellInSeconds": 120, "dependencies": ["NB_1_second"], "args": {"p1": p1, "p2": p2} }, # Run the initial DAG again as a sub-process: { "name": "Second_DAG_NB_1", "path": "NB_1_final", "timeoutPerCellInSeconds": 120, "dependencies": ["NB_1_third"], "args": {"p1": p1, "p2": p2} }, { "name": "Second_DAG_NB_2", "path": "NB_2_final", "timeoutPerCellInSeconds": 120, "dependencies": ["NB_1_third"], "args": {"p1": p1, "p2": p2}, "retry": 1, "retryIntervalInSeconds": 10 }, { "name": "Second_DAG_NB_3", "path": "NB_3_final", "timeoutPerCellInSeconds": 120, "dependencies": ["Second_DAG_NB_1"], "args": {"p1": p1, "p2": p2}, "retry": 1, "retryIntervalInSeconds": 10 }, # Finally, run NB_1 followed by NB_3 { "name": "NB_1_fourth", "path": "NB_1_final", "timeoutPerCellInSeconds": 120, "dependencies": ["Second_DAG_NB_3"], "args": {"p1": p1, "p2": p2} }, { "name": "Final_NB_3", "path": "NB_3_final", "timeoutPerCellInSeconds": 120, "dependencies": ["NB_1_fourth"], "args": {"p1": p1, "p2": p2}, "retry": 1, "retryIntervalInSeconds": 10 } ], "timeoutInSeconds": 43200, "concurrency": 3 } results_BIG = mssparkutils.notebook.runMultiple(BIG_DAG, {"displayDAGViaGraphviz": True}) print("\nExecution complete. Results:") for notebook, status in results_BIG.items(): print(f"Notebook {notebook}: {status}")
Editor is loading...
Leave a Comment