Untitled

 avatar
unknown
python
4 years ago
5.0 kB
6
Indexable
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 19 18:16:51 2021

Tresc zadania 2

1. Scena satelitarna - Sentinel dowolna, każdy swoja (dane o rozdzielczości 10m 5 kanałów), 
   rysujemy na podstawie kanału RGB
2. Zbudowanie zbioru testowego/terningowego wektor, etykiety: Woda Las zieleń pole zabudowa
3. Rastryzacja zbioru treningowego i jego zamiana na współrzędne (macierz rzadka)
4. Podział na zbiór testowy i treningowy
5. Wybór dwóch klasyfikatorów dowolnych i dobór (hiper)parametrów (Random Search)
6. Walidacja wyników na podstawie zbioru testowego
7. Wybór lepszego modelu i jego predykcja na całym zbiorze danych
8. Mapa w QGIS

@author: Moonshroom
"""

import rasterio as rio
import pandas as pd
import geopandas as gpd
import numpy as np

import os

os.chdir("D:\Studia_magisterka\semestr_2\Eksploracja_danych_uczenie_maszynowe\zaliczenie_2\img")

#stworzenie rastra ze wszystkimi pasmami
band2 = rio.open("B02.jp2")
band3 = rio.open("B03.jp2")
band4 = rio.open("B04.jp2")
band8 = rio.open("B08.jp2")

band2 = np.array(band2)
band2_geo = band2.profile
band2_geo.update({"count": 4})
band2_geo.update({"driver": 'GTiff'})

with rio.open('data.tiff', 'w+', **band2_geo) as dest:
    dest.write(band2.read(1),1)
    dest.write(band3.read(1),2)
    dest.write(band4.read(1),3)
    dest.write(band4.read(1),4)

#Tworzenie próby
cl = ['b02','b03','b04','b08']
proby = gpd.read_file("proby.gpkg")
with rio.open("data.tiff", "r") as dst:
    samples = [s for s in dst.sample(zip(proby.geometry.x, proby.geometry.y))]
    samples = np.array(samples)
    samples = pd.DataFrame(samples, 
                       columns = cl, 
                       index = proby.index)
proby_s = proby.join(samples)



#Tworzenie zbioru testowego i treningowego        
X = proby_s.drop(["klasa","geometry"],axis=1)
y = proby_s.klasa.map(dict(woda=1, las=2, zielen=3, pola =4, pole =4, zabudowa=5))

#Hiperparametry i klasyfikatory
from sklearn.neighbors import KNeighborsClassifier as KNC
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

parameters = {'n_neighbors':np.arange(5,20,2).tolist(),
              'algorithm':['ball_tree', 'kd_tree', 'brute']}

best_hyperparams = RandomizedSearchCV(estimator = KNC(), 
                                      param_distributions = parameters,
                                      scoring='accuracy',
                                      n_iter = 10000)
best_hyperparams.fit(X,y)
print('Najlepsze parametry', best_hyperparams.best_params_)



scaled = StandardScaler().fit_transform(X)

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.33, random_state=42)

model_knn = KNN(9)
model_knn.fit(X_train,y_train)
pred_train_knn = model_knn.predict(X_train)
pred_test_knn = model_knn.predict(X_test)

por_train = y_train == pred_train_knn
por_test = y_test == pred_test_knn


print(por_train.sum()/len(por_train),por_test.sum()/len(por_test))

KNN_pred = model_knn.predict(X)
from sklearn.neighbors import NearestCentroid as NC

model_nc = NC('manhattan')
model_nc.fit(X_train, y_train)

pred_train_nc = model_nc.predict(X_train)
pred_test_nc = model_nc.predict(X_test)

por_train = y_train == pred_train_nc
por_test = y_test == pred_test_nc

print(por_train.sum()/len(por_train),por_test.sum()/len(por_test))

#Walidacja
from sklearn.model_selection import cross_val_score

scaled = StandardScaler().fit_transform(X)
scores_knn = cross_val_score(model_knn,scaled,y,cv=5)
scores_nc = cross_val_score(model_nc,scaled,y,cv=5)

model_knn.predict(X)
#Stworzenie rastra
from rasterio.windows import Window
pliki = os.listdir()
pliki = [f for f in os.listdir(".") if f.endswith('.jp2')]
baza = band3
ptrs = []
for plik in pliki:
    ptrs.append(rio.open(plik))
    win_size = 2000
    with rio.open("knn_pred_sup.tif","w+",driver='GTiff', crs=baza.crs, width=baza.width, height=baza.height, transform=baza.transform, dtype='int8', count=1, nodata=0) as wyniki:
        for ycell in range(0,baza.height,win_size):
            ysize = win_size if ycell + win_size < baza.height else baza.height - ycell
            for xcell in range(0,baza.width,win_size):
                xsize = win_size if xcell + win_size < baza.width else baza.width - xcell
                W = Window(xcell,ycell,xsize,ysize)
                all_bands = []
                for ptr in ptrs:
                    all_bands.append(ptr.read(1,window=W).flatten())
                all_bands = np.array(all_bands).T
                results = model_knn.predict(all_bands)
                results.shape = (ysize,xsize)
                results = results.astype('int8')
                wyniki.write(results,window=W,indexes=1)
Editor is loading...