Untitled

 avatar
unknown
plain_text
19 days ago
2.7 kB
3
Indexable
%%time

# Define initial parameters
p1 = 10000000 # rows to table
p2 = 1337 # random seed for np

BIG_DAG = {
  "activities": [
    # Initial DAG: NB_1 and NB_2 in parallel, then NB_3 after NB_1
    {
      "name": "Initial_NB_1",
      "path": "NB_1_final",
      "timeoutPerCellInSeconds": 120,
      "args": {"p1": p1, "p2": p2}
    },
    {
      "name": "Initial_NB_2",
      "path": "NB_2_final",
      "timeoutPerCellInSeconds": 120,
      "args": {"p1": p1, "p2": p2},
      "retry": 1,
      "retryIntervalInSeconds": 10
    },
    {
      "name": "Initial_NB_3",
      "path": "NB_3_final",
      "timeoutPerCellInSeconds": 120,
      "dependencies": ["Initial_NB_1"],
      "args": {"p1": p1, "p2": p2},
      "retry": 1,
      "retryIntervalInSeconds": 10
    },
    # Then run NB_1 twice
    {
      "name": "NB_1_second",
      "path": "NB_1_final",
      "timeoutPerCellInSeconds": 120,
      "dependencies": ["Initial_NB_3"],
      "args": {"p1": p1, "p2": p2}
    },
    {
      "name": "NB_1_third",
      "path": "NB_1_final",
      "timeoutPerCellInSeconds": 120,
      "dependencies": ["NB_1_second"],
      "args": {"p1": p1, "p2": p2}
    },
    # Run the initial DAG again as a sub-process:
    {
      "name": "Second_DAG_NB_1",
      "path": "NB_1_final",
      "timeoutPerCellInSeconds": 120,
      "dependencies": ["NB_1_third"],
      "args": {"p1": p1, "p2": p2}
    },
    {
      "name": "Second_DAG_NB_2",
      "path": "NB_2_final",
      "timeoutPerCellInSeconds": 120,
      "dependencies": ["NB_1_third"],
      "args": {"p1": p1, "p2": p2},
      "retry": 1,
      "retryIntervalInSeconds": 10
    },
    {
      "name": "Second_DAG_NB_3",
      "path": "NB_3_final",
      "timeoutPerCellInSeconds": 120,
      "dependencies": ["Second_DAG_NB_1"],
      "args": {"p1": p1, "p2": p2},
      "retry": 1,
      "retryIntervalInSeconds": 10
    },
    # Finally, run NB_1 followed by NB_3
    {
      "name": "NB_1_fourth",
      "path": "NB_1_final",
      "timeoutPerCellInSeconds": 120,
      "dependencies": ["Second_DAG_NB_3"],
      "args": {"p1": p1, "p2": p2}
    },
    {
      "name": "Final_NB_3",
      "path": "NB_3_final",
      "timeoutPerCellInSeconds": 120,
      "dependencies": ["NB_1_fourth"],
      "args": {"p1": p1, "p2": p2},
      "retry": 1,
      "retryIntervalInSeconds": 10
    }
  ],
  "timeoutInSeconds": 43200,
  "concurrency": 3
}

results_BIG = mssparkutils.notebook.runMultiple(BIG_DAG, {"displayDAGViaGraphviz": True})

print("\nExecution complete. Results:")
for notebook, status in results_BIG.items():
    print(f"Notebook {notebook}: {status}")
Editor is loading...
Leave a Comment