Source code for crowdcount.engine.train

# -*- coding: utf-8 -*-
import torch
from torch.utils.data import DataLoader, Dataset
import torch.optim
import torch.nn
from torch.optim.lr_scheduler import StepLR
import math


[docs]def train(model, train_set: Dataset, test_set: Dataset, train_loss, test_loss, cuda_num=[0], optim="Adam", scheduler_flag=True, learning_rate=1e-5, weight_decay=1e-4, train_batch=1, test_batch=1, num_worker=8, epoch_num=2000, learning_decay=0.995, saver=None, enlarge_num=1 ): """start to train Args: model (torch.nn.Module): the model built to train. train_set (torch.utils.data.Dataset or object): train dataset constructed into torch.utils.data.DataLoader. test_set (torch.utils.data.Dataset or object): test dataset constructed into torch.utils.data.DataLoader. train_loss (object): train loss function constructed from crowdcount.utils. test_loss (object): test loss function constructed from crowdcount.utils. cuda_num (list, optional): CUDA devices(default: [0]). optim (str, optional): optimizer, "Adam" | "SGD", if "Adam", torch.optim.Adam is used, elif "SGD", torch.optim.SGD is used(default:"Adam"). scheduler_flag (bool, optional): if True, learning rate will decline every step with learning decay(default:True). learning_rate (float, optional): learning rate used in optimizer (default: 1e-5). weight_decay (float, optional): weight decay (L2 penalty)(default:1e-4). train_batch (int, optional): train batch(default: 1). test_batch (int, optional): test batch(default: 1). num_worker (int, optional): how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process(default: 8). epoch_num (int, optional): how many epochs to train(default: 2000). learning_decay (float, optional): leaning decay used in scheduler(default: 0.995). saver (crowdcount.utils.Saver, optional): save model(default:None). enlarge_num (int, optional): the scale factor used to enlarge density map(default: 1). """ device = "cuda: {0}".format(cuda_num[0]) if torch.cuda.is_available() else "cpu" model = model.to(device) if optim == "Adam": optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) elif optim == "SGD": optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay) if scheduler_flag is True: scheduler = StepLR(optimizer, step_size=1, gamma=learning_decay) if len(cuda_num) > 1: model = torch.nn.DataParallel(model, device_ids=cuda_num) train_loader = DataLoader(dataset=train_set, batch_size=train_batch, shuffle=True, num_workers=num_worker) test_loader = DataLoader(dataset=test_set, batch_size=test_batch, shuffle=False, num_workers=num_worker) min_mae, min_mse = [1e10, 1e10] for epoch in range(epoch_num): sum_loss, temp_loss, sum_mae, sum_mse, sum_output, sum_gt = [0.0] * 6 model = model.train() for i, (img, ground_truth) in enumerate(train_loader): img = img.float().to(device) ground_truth = ground_truth.float().to(device) optimizer.zero_grad() output = model(img) sum_output += torch.sum(output) sum_gt += torch.sum(ground_truth) loss = train_loss(output, ground_truth) loss.backward() optimizer.step() sum_loss += float(loss) if i % 10 == 9 or i == len(train_loader) - 1: print("| epoch: {} / {} | batch: {} / {} | loss: {:.6f} |".format( epoch, epoch_num, i + 1, len(train_loader), (sum_loss - temp_loss) / 10)) print("| Train | lr: %.8f | output: %.1f | gt: %.1f |" % (optimizer.param_groups[0]['lr'], sum_output / (10 * train_batch * enlarge_num), sum_gt / (10 * train_batch * enlarge_num))) print("------------------------------------------------------") sum_output, sum_gt = [0.0] * 2 temp_loss = sum_loss if scheduler_flag is True: scheduler.step() model = model.eval() with torch.no_grad(): for img, ground_truth in iter(test_loader): img = img.float().to(device) ground_truth = ground_truth.float().to(device) output = model(img) mae, mse = test_loss(output, ground_truth) sum_mae += float(mae) sum_mse += float(mse) avg_mae = sum_mae / len(test_loader) avg_mse = math.sqrt(sum_mse / (len(test_loader))) if avg_mae < min_mae: min_mae = avg_mae min_mse = avg_mse if saver is not None: saver.save(model, "mae_{mae}_mse_{mse}".format(mae=min_mae, mse=min_mse)) print("********************** test ************************") print("* mae:%.1f, mse:%.1f, best_mae:%.1f, best_mse:%.1f *" % (avg_mae, avg_mse, min_mae, min_mse)) print("* sum loss is {:.6f} *".format(sum_loss / len(test_loader))) print("****************************************************")