Untitled

mail@pastecode.io avatar
unknown
plain_text
a month ago
2.4 kB
5
Indexable
Never
def parse_violations(conn):
    # Function to split violations and extract information
    def split_violations(x):
        if not x:
            return []
        violations = []
        current_violation = ""
        for part in x.split('|'):
            if re.match(r'^\s*\d+\.', part):
                if current_violation:
                    violations.append(current_violation.strip())
                current_violation = part
            else:
                current_violation += " | " + part
        if current_violation:
            violations.append(current_violation.strip())
        return violations

    conn.create_function('split_violations', split_violations, ['VARCHAR'], 'VARCHAR[]')
    
    # Updated function to extract violation ID
    conn.create_function('extract_violation_id', lambda x: int(re.match(r'^(\d+)\.', x).group(1)) if x and re.match(r'^(\d+)\.', x) else None, ['VARCHAR'], 'INTEGER')
    
    conn.create_function('extract_description', lambda x: re.sub(r'^\d+\.\s*', '', x.split(' - Comments:')[0]).strip() if x else '', ['VARCHAR'], 'VARCHAR')
    conn.create_function('extract_comments', lambda x: x.split(' - Comments:')[1].strip() if x and ' - Comments:' in x else None, ['VARCHAR'], 'VARCHAR')    

    conn.execute("""
    INSERT INTO VIOLATIONS (violation_id, violation_description)
    SELECT DISTINCT
        extract_violation_id(violation) AS violation_id,
        extract_description(violation) AS violation_description
    FROM (
        SELECT unnest(split_violations(violations)) AS violation
        FROM INSPECTIONS
        WHERE violations IS NOT NULL
    ) subquery
    WHERE violation_id IS NOT NULL
    ON CONFLICT (violation_id) DO UPDATE SET
    violation_description = EXCLUDED.violation_description
    WHERE VIOLATIONS.violation_description != EXCLUDED.violation_description
    """)
    
     # Populate INSPECTION_VIOLATIONS table
    conn.execute("""
    INSERT INTO INSPECTION_VIOLATIONS (inspection_id, violation_id, comments)
    SELECT DISTINCT
        inspection_id,
        extract_violation_id(violation) AS violation_id,
        extract_comments(violation) AS comments
    FROM (
        SELECT unnest(split_violations(violations)) AS violation,
        inspection_id
        FROM INSPECTIONS
        WHERE violations IS NOT NULL
    )
    ON CONFLICT (inspection_id, violation_id, comments) DO NOTHING
    """)
Leave a Comment