Skip to content
Snippets Groups Projects
Commit 1b6092d0 authored by s47700's avatar s47700
Browse files

Upload New File

parent 4531fbfe
No related branches found
No related tags found
No related merge requests found
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import os
import sys
import numpy as np
import time
import yaml
import json
import cv2
"""
Author: Jenö Faist, Paul Judis
Refernces: LFI-3 cnn.py
"""
"""
Template File to Train a CNN Network
"""
"""
This file trains the convolutional neural network with the selected dataset and hyperparameters.
The model is saved in the TRAIN_MODELS folder and can be further trained from there or moved to
COMPLETE_MODELS to save it.
"""
if __name__ == '__main__':
"""
You Define all Data Paths here commonly you save the Model and Hyperparmeters in TRAIN_MODELS
"""
absolutepath = os.path.dirname(__file__)
traing_set_PATH = absolutepath+'/DATASETS/early_3D_Kegel_SET/train'
hyperparameters_PATH = absolutepath+'/TRAIN_MODELS/hyperparameters.yaml'
"""
All Save PATHS
"""
model_save_PATH = absolutepath+'/TRAIN_MODELS/3D_DEC_MODEL_.pt'
training_history_PATH = absolutepath+'/TRAIN_MODELS/3D_DEC_MODEL_TRAIN_HISTORY_.json'
training_save_PATH = absolutepath+'/TRAIN_MODELS/3D_DEC_MODEL_TRAIN_SAVE_.json'
"""
This are the References Printed in the Consol to insure that CUDA uses your GPU
and the DATA Paths are set Correctly
"""
print("STARTING CNN TRAINING")
print("---------------------------")
print("Cuda Version: " + torch.version.cuda)
print("Cuda: "+str(torch.cuda.is_available()))
print("GPU: "+str(torch.cuda.get_device_name()))
print("Current Folderpath: "+absolutepath)
print("Model Saving in: "+model_save_PATH)
print("Training Save in: "+training_save_PATH)
print("Hyperparamtes in: "+hyperparameters_PATH)
print("Training History Save in: "+training_history_PATH)
print("---------------------------")
"""
Setting up Pytorch and setting the seed so that results can be recreated
"""
torch.manual_seed(0)
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
"""
Loading the Hyperparamters if Possible if not use Default Hyperparamters
"""
print("Loading Hyperparamters") # CONSOLE
data = {
'default':
{'batch_size': 64,
'num_epochs': 10,
'learning_rate': 0.001}
}
try:
with open(hyperparameters_PATH, "r") as stream:
data = yaml.safe_load(stream)
print("[!!!] Hyperparamters Loaded Successfully ! [!!!]")
except:
print("[?!?] Hyperparamters couldn't be loaded ! [?!?]")
print("[!!!] Using Default Hyperparamters ! [!!!]")
# Set hyperparameters
num_epochs = data['default']['num_epochs']
batch_size = data['default']['batch_size']
learning_rate = data['default']['learning_rate']
print("---------------------------") # CONSOLE
"""
SETTING UP TRAINING DATA SET
Initialize transformations for data augmentation.
"""
transform = transforms.Compose(
[
transforms.ToTensor(),
transforms.Resize((256,256)),
]
)
# Load the Dataset with the transformations
train_dataset = torchvision.datasets.ImageFolder(
root= traing_set_PATH,
transform=transform
)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8)
"""
In this Path you Define the Neural Network Model that you want be using.
For this File: its default resnet18
"""
model = torchvision.models.resnet18(weights='DEFAULT') # base model
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)
"""
This Loades the not fulled trained Model if training was canceled,
so that the Training of Model can be continued.
"""
# Last training Epoch and Batch
last_eb = [0,0]
# A List to safe the loss over time for later plots
training_history = [[],[]]
try:
model.load_state_dict(torch.load(model_save_PATH),strict=False)
model.eval()
with open(training_save_PATH, 'r') as f:
last_eb = json.load(f)
with open(training_history_PATH, 'r') as f:
training_history = json.load(f)
print("[!!!] Using previous Trained Model Starting from Epoch:"+str(last_eb[0])+" Batch:"+str(last_eb[1])+" [!!!]")
except:
print("[!!!] No last training model found, starting new training with base model [!!!]")
### MODEL TORCH MODIFIERS ###
# Problem give errors (TODO)
# Parallelize training across multiple GPUs
#model = torch.nn.DataParallel(model)
# Set the model to run on the device
model = model.to(device)
"""
Here you are defining wich error function and optimizer you want be using
"""
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
### CONSOLE PRINTS ####
print("---------------------")
print("Beginn Training")
print("Starting with " + "Epoch:"+str(last_eb[0])+" Batch:"+str(last_eb[1]))
print("---------------------")
#######################
# Time Variables for tracking
start_time = time.time()
current_time = time.time()
last_time = 0
"""
In here the Model gets Trainied using the specifications above.
The trainings loop is in a try so if a KeyboardInterrupt happens argo the Programm gets closed
the Model gets automaticly saved.
"""
try:
for epoch in range(last_eb[0],num_epochs):
"""
If we train a Model that gets continued in Training then this part insures that
the last trained batches get skipped.
"""
list_dt = []
count = last_eb[1]
train_loader_iter = iter(train_loader)
data_load_time_start = time.time()
data_load_last_time = 0
list_data_time = []
for n in range(last_eb[1]):
next(train_loader_iter)
data_load_time = time.time() - data_load_time_start - data_load_last_time
list_data_time.append(data_load_time)
avg_dt = sum(list_data_time)/len(list_data_time)
sys.stdout.write("\033[K")
est_time =((last_eb[1] -n)*avg_dt)/60**2
print('Skiping last Trained Batches Estimated Left Time:' + '%.2f h ' % est_time + 'Batches to skip:'+str((last_eb[1] -n)), end='\r')
data_load_last_time = time.time() - data_load_time_start
sys.stdout.write("\033[K")
print("Training Epoch: "+str(epoch)) # Console
for inputs, labels in train_loader:
"""
Train the Model with this Batch
"""
# Move input and label tensors to the device
inputs = inputs.to(device)
labels = labels.to(device)
# Zero out the optimizer
optimizer.zero_grad()
# Forward pass
outputs = model(inputs)
loss = criterion(outputs, labels)
# Backward pass
loss.backward()
optimizer.step()
"""
The Real Training Ends here.
This Part is just for visuals so that you now how far the Model is trained
and how much time is left for this Epoch
"""
current_time = time.time() - start_time
dt = current_time - last_time
list_dt.append(dt)
training_history[0].append(loss.item())
_, preds = torch.max(outputs, 1)
training_history[1].append(torch.sum(preds == labels.data).item())
avg_dt = sum(list_dt)/len(list_dt)
avg_loss = sum(training_history[0])/len(training_history[0])
Time_Estimate = (round((len(train_dataset)-count*batch_size)/batch_size)*avg_dt)/60**2
sys.stdout.write("\033[K")
print('Training Model | Estimated Left Time for this Epoch: ' + "%.2f h" % Time_Estimate + "| Current Average Loss: " + "%.5f" % avg_loss + '| Batches Left:'+str(round((len(train_dataset)-count*batch_size)/batch_size)), end='\r')
last_time = current_time
count += 1
"""
Every Epoch the Model gets Saved Plus the Training History containing all Loses and Accurarcys for every Batch
"""
sys.stdout.write("\033[K")
print(f'Epoch {epoch+1}/{num_epochs} Done, Loss: {sum(training_history[0])/len(training_history[0]):.4f}')
last_eb = [epoch+1,0]
with open(training_save_PATH, 'w') as f:
json.dump(last_eb, f)
torch.save(model.state_dict(), model_save_PATH)
with open(training_history_PATH, 'w') as f:
json.dump(training_history, f)
except KeyboardInterrupt:
"""
This Part Saves the Model if at any Time the Training gets cancelled argo the programm gets Closed
"""
with open(training_save_PATH, 'w') as f:
sys.stdout.write("\033[K")
print("---------------------")
print("[!!!] Trainings Interruption SAVING TRAINING [!!!]")
json.dump(last_eb, f)
torch.save(model.state_dict(), model_save_PATH)
print("[!!!] Training Saved [!!!]")
print("[!!!] Epoch: " + str(epoch) + " Batch: " + str(count)+" [!!!]")
with open(training_history_PATH, 'w') as f:
json.dump(training_history, f)
"""
After Training save the model and Training Histroy
"""
sys.stdout.write("\033[K")
print("---------------------")
print(f'Finished Training, Loss: {sum(training_history[0])/len(training_history[0]):.4f}')
torch.save(model.state_dict(), model_save_PATH)
with open(training_history_PATH, 'w') as f:
json.dump(training_history, f)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment