tested_positive (this is what we want to predict) 单位为百分比,指有多少比例的人
数据下载
To use the Kaggle API, sign up for a Kaggle account at https://www.kaggle.com. Then go to the ‘Account’ tab of your user profile (https://www.kaggle.com/<username>/account) and select ‘Create API Token’. This will trigger the download of kaggle.json, a file containing your API credentials. Place this file in the location ~/.kaggle/kaggle.json (on Windows in the location C:\Users\<Windows-username>\.kaggle\kaggle.json - you can check the exact location, sans drive, with echo %HOMEPATH%). You can define a shell environment variable KAGGLE_CONFIG_DIR to change this location to $KAGGLE_CONFIG_DIR/kaggle.json (on Windows it will be %KAGGLE_CONFIG_DIR%\kaggle.json).
defsame_seed(seed): '''Fixes random number generator seeds for reproducibility.''' # 使用确定的卷积算法 (A bool that, if True, causes cuDNN to only use deterministic convolution algorithms.) torch.backends.cudnn.deterministic = True # 不对多个卷积算法进行基准测试和选择最优 (A bool that, if True, causes cuDNN to benchmark multiple convolution algorithms and select the fastest.) torch.backends.cudnn.benchmark = False # 设置随机数种子 np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
deftrain_valid_split(data_set, valid_ratio, seed): '''Split provided training data into training set and validation set''' valid_set_size = int(valid_ratio * len(data_set)) train_set_size = len(data_set) - valid_set_size train_set, valid_set = random_split(data_set, [train_set_size, valid_set_size], generator=torch.Generator().manual_seed(seed)) return np.array(train_set), np.array(valid_set)
defpredict(test_loader, model, device): # 用于评估模型(验证/测试) model.eval() # Set your model to evaluation mode. preds = [] for x in tqdm(test_loader): # device (int, optional): if specified, all parameters will be copied to that device) x = x.to(device) # 将数据 copy 到 device with torch.no_grad(): # 禁用梯度计算,以减少消耗 pred = model(x) preds.append(pred.detach().cpu()) # detach() 创建一个不在计算图中的新张量,值相同 preds = torch.cat(preds, dim=0).numpy() # 连接 preds return preds
Dataset
classCOVID19Dataset(Dataset): ''' x: Features. y: Targets, if none, do prediction. ''' def__init__(self, x, y=None): if y isNone: self.y = y else: self.y = torch.FloatTensor(y) self.x = torch.FloatTensor(x)
'''meth:`__getitem__`, supporting fetching a data sample for a given key.''' def__getitem__(self, idx):# 自定义 dataset 的 idx 对应的 sample if self.y isNone: return self.x[idx] else: return self.x[idx], self.y[idx]
criterion = nn.MSELoss(reduction='mean') # Define your loss function, do not modify this.
# Define your optimization algorithm. # TODO: Please check https://pytorch.org/docs/stable/optim.html to get more available algorithms. # TODO: L2 regularization (optimizer(weight decay...) or implement by your self). optimizer = torch.optim.SGD(model.parameters(), lr=config['learning_rate'], momentum=config['momentum']) # 设置 optimizer 为SGD writer = SummaryWriter() # Writer of tensoboard.
ifnot os.path.isdir('./models'): os.mkdir('./models') # Create directory of saving models.
for epoch inrange(n_epochs): model.train() # Set your model to train mode. loss_record = [] # 初始化空列表,用于记录训练误差
# tqdm is a package to visualize your training progress. train_pbar = tqdm(train_loader, position=0, leave=True) # 让训练进度显示出来,可以去除这一行,然后将下面的 train_pbar 改成 train_loader(目的是尽量减少 jupyter notebook 的打印,因为如果这段代码在 kaggle 执行,在一定的输出后会报错: IOPub message rate exceeded...)
for x, y in train_pbar: optimizer.zero_grad() # Set gradient to zero. x, y = x.to(device), y.to(device) # Move your data to device. pred = model(x) # 等价于 model.forward(x) loss = criterion(pred, y) # 计算 pred 和 y 的均方误差 loss.backward() # Compute gradient(backpropagation). optimizer.step() # Update parameters. step += 1 loss_record.append(loss.detach().item()) # Display current epoch number and loss on tqdm progress bar. train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]') train_pbar.set_postfix({'loss': loss.detach().item()})
model.eval() # Set your model to evaluation mode. loss_record = [] # 初始化空列表,用于记录验证误差 for x, y in valid_loader: x, y = x.to(device), y.to(device) with torch.no_grad(): pred = model(x) loss = criterion(pred, y)
if mean_valid_loss < best_loss: best_loss = mean_valid_loss torch.save(model.state_dict(), config['save_path']) # Save your best model print('Saving model with loss {:.3f}...'.format(best_loss)) early_stop_count = 0 else: early_stop_count += 1
if early_stop_count >= config['early_stop']: print('\nModel is not improving, so we halt the training session.') return