Untitled

mail@pastecode.io avatar
unknown
plain_text
2 months ago
3.3 kB
10
Indexable
Never
""" Check if script selects from sandbox(_analysts) schema """
import logging
import re

from scripts.validator.linters.dwh.common_dwh_linter_factory import common_dwh_linter_factory
from simple_ddl_parser import DDLParser


path = "../../../../../dwh/dm/dm_detail_edu_skysmart_task_student_step"

def sandbox_check(files: dict):
    yml_pars = files["specification"]
    yml_table_name = yml_pars.get('table-name', "")
    logging.info(f"Наименование таблицы - {yml_table_name}")

    colum_index = 0
    errors = []

    for yml_column_t in yml_pars['structure'][0]['column-name']:
        yml_column = yml_pars['structure'][colum_index]['column-name']
        yml_type = str.upper(yml_pars['structure'][colum_index]['data-type']).split('(')[0]
        logging.info(f" {colum_index} Тип     - {yml_type}")
        logging.info(f" {colum_index} Колонка - {yml_column}")

        check_colum_name = {
                            'BOOL': [r'^is_'],
                            'TIME': [r'time$'],
                            'UUID': [r'uuid$'],
                            'VARCHAR': [r'_nm$', r'_txt$', r'_hash$', r'_list$', r'_code$', r'_id$', r'_rk$', r'_pk$', r'_fk$'],
                            'BIGINT': [r'_sec$', r'_code$', r'_rate$', r'_interval$', r'_amt$', r'_dur$', r'_cnt$', r'_hash$', r'_id$', r'_rk$', r'_pk$', r'_fk$', r'_num$'],
                            'NUMERIC': [r'_amt$', r'_num$', r'_cnt$', r'_pct$', r'_rate$'],
                            'FLOAT2': [r'_amt$', r'_num$', r'_cnt$', r'_pct$', r'_rate$'],
                            'FLOAT8': [r'_amt$', r'_num$', r'_cnt$', r'_pct$', r'_rate$'],
                            'INT2': [r'_id$', r'_cnt$', r'_pk$', r'_fk$', r'_num$', r'_dur$' , r'_code$', r'_amt$', r'_rate$', r'_pct$', r'_rk$' ],
                            'INT4': [r'_id$', r'_cnt$', r'_pk$', r'_fk$', r'_num$', r'_dur$' , r'_code$', r'_amt$', r'_rate$', r'_pct$', r'_rk$' ],
                            'INT8': [r'_id$', r'_cnt$', r'_pk$', r'_fk$', r'_num$', r'_dur$' , r'_code$', r'_amt$', r'_rate$', r'_pct$', r'_rk$' ],
                            'INTEGER': [r'_id$', r'_cnt$', r'_pk$', r'_fk$', r'_num$', r'_dur$' , r'_code$', r'_amt$', r'_rate$', r'_pct$', r'_rk$' ],
                            'TIMESTAMP': [r'_dttm$', r'_dt$'],
                            'DATA': [r'_dttm$', r'_dt$', r'date$'],
                            'JOSN': [r'^[^iI].*']
                            }

        if not check_colum_name.get(yml_type, False):
            logging.info(f"Для типа:{yml_type} не найдены правила проверки наименования")
        else:
            correct = False
            for check in check_colum_name[yml_type]:
                if bool(re.search(check, yml_column, re.IGNORECASE)):
                    correct = True
                    break
            if not correct:
                logging.error(f"Для типа:{yml_type} поле:{yml_column}, не соответсвует правилам наименования")

        colum_index += 1

    if errors:
        for error in errors:
            logging.error(error)
        return False

    return True

common_check = common_dwh_linter_factory([sandbox_check])
common_check.set_path(path)
common_check.local_test()
Leave a Comment