Untitled

mail@pastecode.io avatar
unknown
plain_text
5 months ago
1.5 kB
2
Indexable
import sys

def parse_line(line):
    """
    Parse a line of the dataset and clean up data fields.
    Returns a list of cleaned fields or None if the line is malformed.
    """
    fields = line.strip().split()
    
    if len(fields) < 12:  # Adjust based on the expected number of fields
        return None  # Skip malformed lines
    
    try:
        # Extract necessary fields
        station_id = fields[0]
        date_time = fields[2]
        temperature = float(fields[3])
        humidity = float(fields[4])
        pressure = float(fields[7])
        
        # Optional: Validate ranges or handle missing values
        if temperature < -100 or temperature > 100:  # Example range check
            return None
        if humidity < 0 or humidity > 100:  # Example range check
            return None
        
        # Handle specific cases like '999.9' for missing values
        if pressure == 999.9:
            pressure = None  # Or handle according to your logic
        
        return [station_id, date_time, temperature, humidity, pressure]
    except ValueError:
        return None  # Skip lines with conversion errors

def main():
    for line in sys.stdin:
        fields = parse_line(line)
        if fields:
            # Output cleaned and validated data
            station_id, date_time, temperature, humidity, pressure = fields
            print(f"{station_id}\t{date_time}\t{temperature}\t{humidity}\t{pressure}")

if __name__ == "__main__":
    main()
Leave a Comment