Untitled
unknown
plain_text
a year ago
1.5 kB
10
Indexable
import sys
def parse_line(line):
"""
Parse a line of the dataset and clean up data fields.
Returns a list of cleaned fields or None if the line is malformed.
"""
fields = line.strip().split()
if len(fields) < 12: # Adjust based on the expected number of fields
return None # Skip malformed lines
try:
# Extract necessary fields
station_id = fields[0]
date_time = fields[2]
temperature = float(fields[3])
humidity = float(fields[4])
pressure = float(fields[7])
# Optional: Validate ranges or handle missing values
if temperature < -100 or temperature > 100: # Example range check
return None
if humidity < 0 or humidity > 100: # Example range check
return None
# Handle specific cases like '999.9' for missing values
if pressure == 999.9:
pressure = None # Or handle according to your logic
return [station_id, date_time, temperature, humidity, pressure]
except ValueError:
return None # Skip lines with conversion errors
def main():
for line in sys.stdin:
fields = parse_line(line)
if fields:
# Output cleaned and validated data
station_id, date_time, temperature, humidity, pressure = fields
print(f"{station_id}\t{date_time}\t{temperature}\t{humidity}\t{pressure}")
if __name__ == "__main__":
main()
Editor is loading...
Leave a Comment