I'm building a TFT forecasting model using PyTorch for the first time and having trouble extracting the predicted values along with their corresponding actual values from the output.
Ideally, I’d like to obtain a dataframe indexed by date with columns: actual value, predicted value, and Series.
Any guidance would be greatly appreciated!
Thanks!
Here's my model setup:
# Set up the TimeSeriesDataSet for PyTorch Forecasting train['Date']= train.index # Convert Date to datetime train['Date'] = pd.to_datetime(train['Date']) train['time_idx'] = train.groupby('Series')['Date'].rank(method='dense').astype(int) validation['Date']= validation.index # Convert Date to datetime validation['Date'] = pd.to_datetime(validation['Date']) validation['time_idx'] = validation.groupby('Series')['Date'].rank(method='dense').astype(int) # Convert Date to datetime for the test set test['Date']= test.index test['Date'] = pd.to_datetime(test['Date']) test['time_idx'] = test.groupby('Series')['Date'].rank(method='dense').astype(int) train= train.drop(columns=['Date']) validation= validation.drop(columns=['Date']) test= test.drop(columns=['Date']) max_encoder_length = 30 # Number of time steps in the past max_prediction_length = 2 # Number of time steps to predict training = TimeSeriesDataSet( train, time_idx="time_idx", target="y", group_ids=["Series"], static_categoricals=["Market", "Length", "Grade_Type", "Width", "Height"], time_varying_known_categoricals=[], time_varying_known_reals= time_varying+ ['time_idx'], time_varying_unknown_reals=["y"], max_encoder_length=max_encoder_length, max_prediction_length=max_prediction_length, allow_missing_timesteps=True, target_normalizer=None, add_relative_time_idx=True, # Add relative time index add_encoder_length=True, # Add encoder length ) # Set up the TimeSeriesDataSet for the test set (same structure) test_data = TimeSeriesDataSet.from_dataset(training, test, predict=True) validation_data = TimeSeriesDataSet.from_dataset(training, validation, predict=True) # Create DataLoader for batching from torch.utils.data import DataLoader batch_size = 64 # Adjust according to your system capabilities train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0) validation_dataloader = validation_data.to_dataloader(train=False, batch_size=batch_size, num_workers=0) test_dataloader = test_data.to_dataloader(train=False, batch_size=batch_size, num_workers=0) train_dataloader # === 🟢 Step 1: Improved Model === tft = TemporalFusionTransformer.from_dataset( training, learning_rate=1e-3, hidden_size=64, attention_head_size=15, dropout=0.18, hidden_continuous_size=32, output_size=1, loss=MAE(), log_interval=10, reduce_on_plateau_patience=2, ) # === 🟢 Step 2: Callbacks === early_stop_callback = EarlyStopping(monitor="val_loss", patience=5, mode="min", verbose=True) lr_monitor = LearningRateMonitor(logging_interval="epoch") # === 🟢 Step 3: Trainer === trainer = pl.Trainer( max_epochs=20, accelerator="auto", gradient_clip_val=0.5, callbacks=[early_stop_callback, lr_monitor], enable_progress_bar=True, ) # === 🟢 Step 4: Train the Model === trainer.fit(tft, train_dataloaders=train_dataloader, val_dataloaders=validation_dataloader) predictions = tft.predict(test_dataloader, return_y=True)```