forked from mmatena/model_merging
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_custom_model.py
90 lines (79 loc) · 3.37 KB
/
test_custom_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os
os.environ['TRANSFORMERS_CACHE'] = '/home/acd13578qu/data/.cache/huggingface'
from transformers import RobertaConfig, RobertaModel, ViTFeatureExtractor, ViTModel, TFAutoModelForSequenceClassification, AutoTokenizer, TFViTModel
from pprint import pprint
# TODO: Need to figure out how to view all the Tf layers
# This will let me see if they're in the right order/etc
configuration = RobertaConfig()
roberta_model = TFAutoModelForSequenceClassification.from_pretrained(
# '/home/acd13578qu/scratch/roberta_actual/checkpoints/checkpoint_best.pt',
'textattack/roberta-base-RTE',
from_pt=True)
roberta_layers = roberta_model.layers[0].encoder.layer
vit_model = TFViTModel.from_pretrained('google/vit-base-patch16-224-in21k', from_pt=True)
vit_layers = vit_model.layers[0].encoder.layer
# for roberta_var, vit_var in zip(roberta_layers[0].trainable_variables, vit_layers[0].trainable_variables):
# print(roberta_var.shape)
# print(vit_var.shape)
# print()
print(len(roberta_layers[0].attention.self_attention.key.trainable_variables))
print(len(vit_layers[0].attention.self_attention.key.trainable_variables))
print(roberta_layers[0].attention.self_attention.key.trainable_variables[1].shape)
print(vit_layers[0].attention.self_attention.key.trainable_variables[1].shape)
print(len(roberta_layers[0].attention.self_attention.value.trainable_variables))
print(len(vit_layers[0].attention.self_attention.value.trainable_variables))
print(roberta_layers[0].attention.self_attention.value.trainable_variables[1].shape)
print(vit_layers[0].attention.self_attention.value.trainable_variables[1].shape)
# print('roberta layers:')
# print(len(roberta_layers.trainable_variables))
# print(len(roberta_layers.encoder.layer))
# print(len(vit_layers.trainable_variables))
# print(roberta_layers.trainable_variables[0].shape)
# print(vit_layers.trainable_variables[3].shape)
# print(len(vit_layers.encoder.layer))
# pprint(dir(roberta_model))
# pprint(dir(roberta_layers))
# print(roberta_model.summary())
# print('vit layers:')
# print(vit_layers)
# # print('roberta:')
# roberta_params = []
# roberta_shapes = []
# roberta_total_params = 0
# for name, param in roberta_model.named_parameters():
# if param.requires_grad:
# # print(str(name))
# start = 1
# roberta_shapes.append(param.data.size())
# for elem in list(param.data.size()):
# start *= elem
# roberta_total_params += start
# roberta_params.append(name)
# # print('vit:')
# vit_params = []
# vit_shapes = []
# vit_total_params = 0
# for name, param in vit_model.named_parameters():
# if param.requires_grad:
# # print(str(name))
# start = 1
# vit_shapes.append(param.data.size())
# for elem in list(param.data.size()):
# start *= elem
# vit_total_params += start
# vit_params.append(name)
# for i in range(5, len(roberta_params)):
# if roberta_params[i] == vit_params[i-1]:
# print(roberta_params[i])
# print(roberta_shapes[i])
# print(vit_params[i-1])
# print(vit_shapes[i-1])
# print()
# for bert_param, vit_param, bert_shape, vit_shape in zip(bert_params, vit_params, bert_shapes, vit_shapes):
# if bert_shape != vit_shape:
# print('Mismatch!!')
# print(bert_shape)
# print(vit_shape)
# print(bert_param)
# print(vit_param)
# print()