Untitled
unknown
plain_text
2 years ago
2.4 kB
10
Indexable
---
version: v2.1
job_type: spark3.1.1
cluster: hdp-z502-prod
conda_pack: data_science/matcher/ml-products/prediction-pipeline:v7.0.6
hdfs_warehouse_path: hdfs:///warehouse/matcher/
spark_config:
files:
electronics:
path: hdfs:///warehouse/ml_products_team/ds/files/models/electronics_v2_20230417_ce.cbm
# electronics_v2:
# path: hdfs:///warehouse/ml_products_team/ds/files/models/electronics_v3_20230706_ce_pretrained.cbm
# all_v3:
# path: hdfs:///warehouse/ml_products_team/ds/files/models/all_v3_20230831.cbm
context:
spark.task.cpus: 2
spark.yarn.queue: ml_products
spark.driver.cores: 4
spark.driver.memory: 8g
spark.executor.cores: 8
spark.executor.memory: 8g
spark.memory.fraction: 0.6
spark.executor.instances: 80
spark.driver.maxResultSize: 4g
spark.driver.memoryOverhead: 2g
spark.executor.memoryOverhead: 6g
spark.shuffle.service.enabled: true
spark.dynamicAllocation.enabled: false
spark.sql.autoBroadcastJoinThreshold: -1
spark.sql.execution.arrow.pyspark.enabled: true
spark.sql.sources.partitionOverwriteMode: dynamic
callable: matcher_prediction_pipeline.multi_inference.main
input_data:
candidates:
type: hive
# table: matcher.ozon_ozon_candidates_features
table: ml_products.all_categories_v3_20230814_train_dataset_finally
prod_table:
type: hive
table: matcher.ozon_data_etl_v2_prod
# table: ml_products.all_categories_v3_20230814_train_dataset_finally
output_data:
prediction:
type: hive
table: mzakhvataev_db.ozon_ozon_predictions_electronics_v2
insertInto:
overwrite: true
on_missing: create
partitionBy: date
extra_kwargs:
tasks_per_worker: 12
tmp_output_table: mzakhvataev_db.ozon_ozon_predictions_tmp_common
prod_table_key: variantid
models_config:
electronics:
- "15621042" # Электроника
# electronics_v2:
# - "15621042" # Электроника
# all_v3:
# - "15621042" # Электроника
extra_columns:
- [source, string]
data:
first:
key: variantid
input_data: prod_table
candidates_key: variantid1
second:
key: variantid
input_data: prod_table
candidates_key: variantid2
_submit_params:
explain: 'false'
timeout: '60'
execution_date: '2023-09-23T01:00:00+00:00'Editor is loading...