Untitled
unknown
plain_text
10 months ago
2.7 kB
16
Indexable
%%time
# Define initial parameters
p1 = 10000000 # rows to table
p2 = 1337 # random seed for np
BIG_DAG = {
"activities": [
# Initial DAG: NB_1 and NB_2 in parallel, then NB_3 after NB_1
{
"name": "Initial_NB_1",
"path": "NB_1_final",
"timeoutPerCellInSeconds": 120,
"args": {"p1": p1, "p2": p2}
},
{
"name": "Initial_NB_2",
"path": "NB_2_final",
"timeoutPerCellInSeconds": 120,
"args": {"p1": p1, "p2": p2},
"retry": 1,
"retryIntervalInSeconds": 10
},
{
"name": "Initial_NB_3",
"path": "NB_3_final",
"timeoutPerCellInSeconds": 120,
"dependencies": ["Initial_NB_1"],
"args": {"p1": p1, "p2": p2},
"retry": 1,
"retryIntervalInSeconds": 10
},
# Then run NB_1 twice
{
"name": "NB_1_second",
"path": "NB_1_final",
"timeoutPerCellInSeconds": 120,
"dependencies": ["Initial_NB_3"],
"args": {"p1": p1, "p2": p2}
},
{
"name": "NB_1_third",
"path": "NB_1_final",
"timeoutPerCellInSeconds": 120,
"dependencies": ["NB_1_second"],
"args": {"p1": p1, "p2": p2}
},
# Run the initial DAG again as a sub-process:
{
"name": "Second_DAG_NB_1",
"path": "NB_1_final",
"timeoutPerCellInSeconds": 120,
"dependencies": ["NB_1_third"],
"args": {"p1": p1, "p2": p2}
},
{
"name": "Second_DAG_NB_2",
"path": "NB_2_final",
"timeoutPerCellInSeconds": 120,
"dependencies": ["NB_1_third"],
"args": {"p1": p1, "p2": p2},
"retry": 1,
"retryIntervalInSeconds": 10
},
{
"name": "Second_DAG_NB_3",
"path": "NB_3_final",
"timeoutPerCellInSeconds": 120,
"dependencies": ["Second_DAG_NB_1"],
"args": {"p1": p1, "p2": p2},
"retry": 1,
"retryIntervalInSeconds": 10
},
# Finally, run NB_1 followed by NB_3
{
"name": "NB_1_fourth",
"path": "NB_1_final",
"timeoutPerCellInSeconds": 120,
"dependencies": ["Second_DAG_NB_3"],
"args": {"p1": p1, "p2": p2}
},
{
"name": "Final_NB_3",
"path": "NB_3_final",
"timeoutPerCellInSeconds": 120,
"dependencies": ["NB_1_fourth"],
"args": {"p1": p1, "p2": p2},
"retry": 1,
"retryIntervalInSeconds": 10
}
],
"timeoutInSeconds": 43200,
"concurrency": 3
}
results_BIG = mssparkutils.notebook.runMultiple(BIG_DAG, {"displayDAGViaGraphviz": True})
print("\nExecution complete. Results:")
for notebook, status in results_BIG.items():
print(f"Notebook {notebook}: {status}")Editor is loading...
Leave a Comment