Untitled
unknown
plain_text
a year ago
2.4 kB
2
Indexable
Never
--- version: v2.1 job_type: spark3.1.1 cluster: hdp-z502-prod conda_pack: data_science/matcher/ml-products/prediction-pipeline:v7.0.6 hdfs_warehouse_path: hdfs:///warehouse/matcher/ spark_config: files: electronics: path: hdfs:///warehouse/ml_products_team/ds/files/models/electronics_v2_20230417_ce.cbm # electronics_v2: # path: hdfs:///warehouse/ml_products_team/ds/files/models/electronics_v3_20230706_ce_pretrained.cbm # all_v3: # path: hdfs:///warehouse/ml_products_team/ds/files/models/all_v3_20230831.cbm context: spark.task.cpus: 2 spark.yarn.queue: ml_products spark.driver.cores: 4 spark.driver.memory: 8g spark.executor.cores: 8 spark.executor.memory: 8g spark.memory.fraction: 0.6 spark.executor.instances: 80 spark.driver.maxResultSize: 4g spark.driver.memoryOverhead: 2g spark.executor.memoryOverhead: 6g spark.shuffle.service.enabled: true spark.dynamicAllocation.enabled: false spark.sql.autoBroadcastJoinThreshold: -1 spark.sql.execution.arrow.pyspark.enabled: true spark.sql.sources.partitionOverwriteMode: dynamic callable: matcher_prediction_pipeline.multi_inference.main input_data: candidates: type: hive # table: matcher.ozon_ozon_candidates_features table: ml_products.all_categories_v3_20230814_train_dataset_finally prod_table: type: hive table: matcher.ozon_data_etl_v2_prod # table: ml_products.all_categories_v3_20230814_train_dataset_finally output_data: prediction: type: hive table: mzakhvataev_db.ozon_ozon_predictions_electronics_v2 insertInto: overwrite: true on_missing: create partitionBy: date extra_kwargs: tasks_per_worker: 12 tmp_output_table: mzakhvataev_db.ozon_ozon_predictions_tmp_common prod_table_key: variantid models_config: electronics: - "15621042" # Электроника # electronics_v2: # - "15621042" # Электроника # all_v3: # - "15621042" # Электроника extra_columns: - [source, string] data: first: key: variantid input_data: prod_table candidates_key: variantid1 second: key: variantid input_data: prod_table candidates_key: variantid2 _submit_params: explain: 'false' timeout: '60' execution_date: '2023-09-23T01:00:00+00:00'