Trainer( (net): Chem_VGP_NN_3tasks_query_context_posenc( (encoder): ResNeSt( (encoder): Sequential( (conv1): Sequential( (0): Conv2d(1, 32, kernel_size=(3, 3), stride=1, padding=same, bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): ReLU(inplace=True) (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): ReLU(inplace=True) (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) ) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (maxpool): Identity() (layer1): Sequential( (0): Bottleneck( (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): SplAtConv2d( (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2, bias=False) (bn0): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (fc1): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1)) (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (fc2): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1)) (rsoftmax): rSoftMax() ) (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (downsample): Sequential( (0): AvgPool2d(kernel_size=1, stride=1, padding=0) (1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): Bottleneck( (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): SplAtConv2d( (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2, bias=False) (bn0): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (fc1): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1)) (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (fc2): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1)) (rsoftmax): rSoftMax() ) (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) ) (2): Bottleneck( (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): SplAtConv2d( (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2, bias=False) (bn0): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (fc1): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1)) (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (fc2): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1)) (rsoftmax): rSoftMax() ) (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) ) ) (layer2): Sequential( (0): Bottleneck( (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (avd_layer): Identity() (conv2): SplAtConv2d( (conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2, bias=False) (bn0): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (fc1): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1)) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (fc2): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1)) (rsoftmax): rSoftMax() ) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (downsample): Sequential( (0): Identity() (1): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): Bottleneck( (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): SplAtConv2d( (conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2, bias=False) (bn0): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (fc1): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1)) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (fc2): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1)) (rsoftmax): rSoftMax() ) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) ) (2): Bottleneck( (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): SplAtConv2d( (conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2, bias=False) (bn0): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (fc1): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1)) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (fc2): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1)) (rsoftmax): rSoftMax() ) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) ) (3): Bottleneck( (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): SplAtConv2d( (conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2, bias=False) (bn0): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (fc1): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1)) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (fc2): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1)) (rsoftmax): rSoftMax() ) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) ) ) (reduction_head): Sequential( (0): Conv2d(512, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): ReLU(inplace=True) (3): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): ReLU(inplace=True) ) ) ) (pool): SpatialPyramidPool() (relu): ReLU(inplace=True) (flatten): Flatten(start_dim=1, end_dim=-1) (gnn_classifier): CustomGAT( (gat): CustomGATLayer( (leakyrelu): LeakyReLU(negative_slope=0.2) ) ) (classifier): Sequential( (0): Linear(in_features=682, out_features=512, bias=True) (1): ReLU() (2): Dropout(p=0.0, inplace=False) (3): Linear(in_features=512, out_features=37, bias=True) ) (segmenter): Sequential( (0): Linear(in_features=682, out_features=512, bias=True) (1): ReLU() (2): Dropout(p=0.0, inplace=False) (3): Linear(in_features=512, out_features=2, bias=True) ) (parser): Sequential( (0): Linear(in_features=682, out_features=512, bias=True) (1): ReLU() (2): Dropout(p=0.0, inplace=False) (3): Linear(in_features=512, out_features=2, bias=True) ) ) (criterion): CrossEntropyLoss() (criterion_sym): CrossEntropyLoss() (criterion_seg): CrossEntropyLoss() (criterion_rel): CrossEntropyLoss() ) Seed=0