Untitled
unknown
python
5 years ago
5.0 kB
10
Indexable
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 19 18:16:51 2021
Tresc zadania 2
1. Scena satelitarna - Sentinel dowolna, każdy swoja (dane o rozdzielczości 10m 5 kanałów),
rysujemy na podstawie kanału RGB
2. Zbudowanie zbioru testowego/terningowego wektor, etykiety: Woda Las zieleń pole zabudowa
3. Rastryzacja zbioru treningowego i jego zamiana na współrzędne (macierz rzadka)
4. Podział na zbiór testowy i treningowy
5. Wybór dwóch klasyfikatorów dowolnych i dobór (hiper)parametrów (Random Search)
6. Walidacja wyników na podstawie zbioru testowego
7. Wybór lepszego modelu i jego predykcja na całym zbiorze danych
8. Mapa w QGIS
@author: Moonshroom
"""
import rasterio as rio
import pandas as pd
import geopandas as gpd
import numpy as np
import os
os.chdir("D:\Studia_magisterka\semestr_2\Eksploracja_danych_uczenie_maszynowe\zaliczenie_2\img")
#stworzenie rastra ze wszystkimi pasmami
band2 = rio.open("B02.jp2")
band3 = rio.open("B03.jp2")
band4 = rio.open("B04.jp2")
band8 = rio.open("B08.jp2")
band2 = np.array(band2)
band2_geo = band2.profile
band2_geo.update({"count": 4})
band2_geo.update({"driver": 'GTiff'})
with rio.open('data.tiff', 'w+', **band2_geo) as dest:
dest.write(band2.read(1),1)
dest.write(band3.read(1),2)
dest.write(band4.read(1),3)
dest.write(band4.read(1),4)
#Tworzenie próby
cl = ['b02','b03','b04','b08']
proby = gpd.read_file("proby.gpkg")
with rio.open("data.tiff", "r") as dst:
samples = [s for s in dst.sample(zip(proby.geometry.x, proby.geometry.y))]
samples = np.array(samples)
samples = pd.DataFrame(samples,
columns = cl,
index = proby.index)
proby_s = proby.join(samples)
#Tworzenie zbioru testowego i treningowego
X = proby_s.drop(["klasa","geometry"],axis=1)
y = proby_s.klasa.map(dict(woda=1, las=2, zielen=3, pola =4, pole =4, zabudowa=5))
#Hiperparametry i klasyfikatory
from sklearn.neighbors import KNeighborsClassifier as KNC
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
parameters = {'n_neighbors':np.arange(5,20,2).tolist(),
'algorithm':['ball_tree', 'kd_tree', 'brute']}
best_hyperparams = RandomizedSearchCV(estimator = KNC(),
param_distributions = parameters,
scoring='accuracy',
n_iter = 10000)
best_hyperparams.fit(X,y)
print('Najlepsze parametry', best_hyperparams.best_params_)
scaled = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.33, random_state=42)
model_knn = KNN(9)
model_knn.fit(X_train,y_train)
pred_train_knn = model_knn.predict(X_train)
pred_test_knn = model_knn.predict(X_test)
por_train = y_train == pred_train_knn
por_test = y_test == pred_test_knn
print(por_train.sum()/len(por_train),por_test.sum()/len(por_test))
KNN_pred = model_knn.predict(X)
from sklearn.neighbors import NearestCentroid as NC
model_nc = NC('manhattan')
model_nc.fit(X_train, y_train)
pred_train_nc = model_nc.predict(X_train)
pred_test_nc = model_nc.predict(X_test)
por_train = y_train == pred_train_nc
por_test = y_test == pred_test_nc
print(por_train.sum()/len(por_train),por_test.sum()/len(por_test))
#Walidacja
from sklearn.model_selection import cross_val_score
scaled = StandardScaler().fit_transform(X)
scores_knn = cross_val_score(model_knn,scaled,y,cv=5)
scores_nc = cross_val_score(model_nc,scaled,y,cv=5)
model_knn.predict(X)
#Stworzenie rastra
from rasterio.windows import Window
pliki = os.listdir()
pliki = [f for f in os.listdir(".") if f.endswith('.jp2')]
baza = band3
ptrs = []
for plik in pliki:
ptrs.append(rio.open(plik))
win_size = 2000
with rio.open("knn_pred_sup.tif","w+",driver='GTiff', crs=baza.crs, width=baza.width, height=baza.height, transform=baza.transform, dtype='int8', count=1, nodata=0) as wyniki:
for ycell in range(0,baza.height,win_size):
ysize = win_size if ycell + win_size < baza.height else baza.height - ycell
for xcell in range(0,baza.width,win_size):
xsize = win_size if xcell + win_size < baza.width else baza.width - xcell
W = Window(xcell,ycell,xsize,ysize)
all_bands = []
for ptr in ptrs:
all_bands.append(ptr.read(1,window=W).flatten())
all_bands = np.array(all_bands).T
results = model_knn.predict(all_bands)
results.shape = (ysize,xsize)
results = results.astype('int8')
wyniki.write(results,window=W,indexes=1)Editor is loading...