Z_encoder_decoder = layer_norm(Z_encoder_decoder + Z)
Z_encoder_decoder = layer_norm(Z_encoder_decoder + Z_self_attention)