Toggle navigation
Toggle navigation
This project
Loading...
Sign in
graykode
/
commit-autosuggestions
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
graykode
2020-11-01 17:53:28 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
ad248582abe296e2f15198a92d8d9d093ae6e47a
ad248582
1 parent
3d6b29a6
(add) patch ids embedding roberta model
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
297 additions
and
32 deletions
code2nl/customized_roberta.py
code2nl/model.py
code2nl/run.py
code2nl/customized_roberta.py
0 → 100644
View file @
ad24858
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""PyTorch RoBERTa model. """
import
torch
import
torch.nn
as
nn
from
transformers.modeling_roberta
import
(
create_position_ids_from_input_ids
,
RobertaPreTrainedModel
,
RobertaEncoder
,
RobertaPooler
,
BaseModelOutputWithPooling
)
class
RobertaEmbeddings
(
nn
.
Module
):
"""
Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
"""
# Copied from transformers.modeling_bert.BertEmbeddings.__init__
def
__init__
(
self
,
config
):
super
()
.
__init__
()
self
.
word_embeddings
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
hidden_size
,
padding_idx
=
config
.
pad_token_id
)
self
.
position_embeddings
=
nn
.
Embedding
(
config
.
max_position_embeddings
,
config
.
hidden_size
)
self
.
token_type_embeddings
=
nn
.
Embedding
(
config
.
type_vocab_size
,
config
.
hidden_size
)
self
.
patch_type_embeddings
=
nn
.
Embedding
(
3
,
config
.
hidden_size
)
# self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
# any TensorFlow checkpoint file
self
.
LayerNorm
=
nn
.
LayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
dropout
=
nn
.
Dropout
(
config
.
hidden_dropout_prob
)
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self
.
register_buffer
(
"position_ids"
,
torch
.
arange
(
config
.
max_position_embeddings
)
.
expand
((
1
,
-
1
)))
# End copy
self
.
padding_idx
=
config
.
pad_token_id
self
.
position_embeddings
=
nn
.
Embedding
(
config
.
max_position_embeddings
,
config
.
hidden_size
,
padding_idx
=
self
.
padding_idx
)
def
forward
(
self
,
input_ids
=
None
,
patch_ids
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
inputs_embeds
=
None
):
if
position_ids
is
None
:
if
input_ids
is
not
None
:
# Create the position ids from the input token ids. Any padded tokens remain padded.
position_ids
=
create_position_ids_from_input_ids
(
input_ids
,
self
.
padding_idx
)
.
to
(
input_ids
.
device
)
else
:
position_ids
=
self
.
create_position_ids_from_inputs_embeds
(
inputs_embeds
)
# Copied from transformers.modeling_bert.BertEmbeddings.forward
if
input_ids
is
not
None
:
input_shape
=
input_ids
.
size
()
else
:
input_shape
=
inputs_embeds
.
size
()[:
-
1
]
seq_length
=
input_shape
[
1
]
if
position_ids
is
None
:
position_ids
=
self
.
position_ids
[:,
:
seq_length
]
if
token_type_ids
is
None
:
token_type_ids
=
torch
.
zeros
(
input_shape
,
dtype
=
torch
.
long
,
device
=
self
.
position_ids
.
device
)
if
inputs_embeds
is
None
:
inputs_embeds
=
self
.
word_embeddings
(
input_ids
)
position_embeddings
=
self
.
position_embeddings
(
position_ids
)
token_type_embeddings
=
self
.
token_type_embeddings
(
token_type_ids
)
embeddings
=
inputs_embeds
+
position_embeddings
+
token_type_embeddings
if
patch_ids
is
not
None
:
patch_type_embeddings
=
self
.
patch_type_embeddings
(
patch_ids
)
embeddings
+=
patch_type_embeddings
embeddings
=
self
.
LayerNorm
(
embeddings
)
embeddings
=
self
.
dropout
(
embeddings
)
return
embeddings
def
create_position_ids_from_inputs_embeds
(
self
,
inputs_embeds
):
"""We are provided embeddings directly. We cannot infer which are padded so just generate
sequential position ids.
:param torch.Tensor inputs_embeds:
:return torch.Tensor:
"""
input_shape
=
inputs_embeds
.
size
()[:
-
1
]
sequence_length
=
input_shape
[
1
]
position_ids
=
torch
.
arange
(
self
.
padding_idx
+
1
,
sequence_length
+
self
.
padding_idx
+
1
,
dtype
=
torch
.
long
,
device
=
inputs_embeds
.
device
)
return
position_ids
.
unsqueeze
(
0
)
.
expand
(
input_shape
)
class
RobertaModel
(
RobertaPreTrainedModel
):
"""
The model can behave as an encoder (with only self-attention) as well
as a decoder, in which case a layer of cross-attention is added between
the self-attention layers, following the architecture described in `Attention is all you need`_ by Ashish Vaswani,
Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.
To behave as an decoder the model needs to be initialized with the
:obj:`is_decoder` argument of the configuration set to :obj:`True`.
To be used in a Seq2Seq model, the model needs to initialized with both :obj:`is_decoder`
argument and :obj:`add_cross_attention` set to :obj:`True`; an
:obj:`encoder_hidden_states` is then expected as an input to the forward pass.
.. _`Attention is all you need`:
https://arxiv.org/abs/1706.03762
"""
authorized_missing_keys
=
[
r"position_ids"
]
# Copied from transformers.modeling_bert.BertModel.__init__ with Bert->Roberta
def
__init__
(
self
,
config
,
add_pooling_layer
=
True
):
super
()
.
__init__
(
config
)
self
.
config
=
config
self
.
embeddings
=
RobertaEmbeddings
(
config
)
self
.
encoder
=
RobertaEncoder
(
config
)
self
.
pooler
=
RobertaPooler
(
config
)
if
add_pooling_layer
else
None
self
.
init_weights
()
def
get_input_embeddings
(
self
):
return
self
.
embeddings
.
word_embeddings
def
set_input_embeddings
(
self
,
value
):
self
.
embeddings
.
word_embeddings
=
value
def
_prune_heads
(
self
,
heads_to_prune
):
"""Prunes heads of the model.
heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
See base class PreTrainedModel
"""
for
layer
,
heads
in
heads_to_prune
.
items
():
self
.
encoder
.
layer
[
layer
]
.
attention
.
prune_heads
(
heads
)
# Copied from transformers.modeling_bert.BertModel.forward
def
forward
(
self
,
input_ids
=
None
,
patch_ids
=
None
,
attention_mask
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
,
inputs_embeds
=
None
,
encoder_hidden_states
=
None
,
encoder_attention_mask
=
None
,
output_attentions
=
None
,
output_hidden_states
=
None
,
return_dict
=
None
,
):
r"""
encoder_hidden_states (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention
if the model is configured as a decoder.
encoder_attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
Mask to avoid performing attention on the padding token indices of the encoder input. This mask
is used in the cross-attention if the model is configured as a decoder.
Mask values selected in ``[0, 1]``:
``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens.
"""
output_attentions
=
output_attentions
if
output_attentions
is
not
None
else
self
.
config
.
output_attentions
output_hidden_states
=
(
output_hidden_states
if
output_hidden_states
is
not
None
else
self
.
config
.
output_hidden_states
)
return_dict
=
return_dict
if
return_dict
is
not
None
else
self
.
config
.
use_return_dict
if
input_ids
is
not
None
and
inputs_embeds
is
not
None
:
raise
ValueError
(
"You cannot specify both input_ids and inputs_embeds at the same time"
)
elif
input_ids
is
not
None
:
input_shape
=
input_ids
.
size
()
elif
inputs_embeds
is
not
None
:
input_shape
=
inputs_embeds
.
size
()[:
-
1
]
else
:
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
device
=
input_ids
.
device
if
input_ids
is
not
None
else
inputs_embeds
.
device
if
attention_mask
is
None
:
attention_mask
=
torch
.
ones
(
input_shape
,
device
=
device
)
if
token_type_ids
is
None
:
token_type_ids
=
torch
.
zeros
(
input_shape
,
dtype
=
torch
.
long
,
device
=
device
)
# We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
# ourselves in which case we just need to make it broadcastable to all heads.
extended_attention_mask
:
torch
.
Tensor
=
self
.
get_extended_attention_mask
(
attention_mask
,
input_shape
,
device
)
# If a 2D or 3D attention mask is provided for the cross-attention
# we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]
if
self
.
config
.
is_decoder
and
encoder_hidden_states
is
not
None
:
encoder_batch_size
,
encoder_sequence_length
,
_
=
encoder_hidden_states
.
size
()
encoder_hidden_shape
=
(
encoder_batch_size
,
encoder_sequence_length
)
if
encoder_attention_mask
is
None
:
encoder_attention_mask
=
torch
.
ones
(
encoder_hidden_shape
,
device
=
device
)
encoder_extended_attention_mask
=
self
.
invert_attention_mask
(
encoder_attention_mask
)
else
:
encoder_extended_attention_mask
=
None
# Prepare head mask if needed
# 1.0 in head_mask indicate we keep the head
# attention_probs has shape bsz x n_heads x N x N
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
head_mask
=
self
.
get_head_mask
(
head_mask
,
self
.
config
.
num_hidden_layers
)
embedding_output
=
self
.
embeddings
(
input_ids
=
input_ids
,
patch_ids
=
patch_ids
,
position_ids
=
position_ids
,
token_type_ids
=
token_type_ids
,
inputs_embeds
=
inputs_embeds
)
encoder_outputs
=
self
.
encoder
(
embedding_output
,
attention_mask
=
extended_attention_mask
,
head_mask
=
head_mask
,
encoder_hidden_states
=
encoder_hidden_states
,
encoder_attention_mask
=
encoder_extended_attention_mask
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
)
sequence_output
=
encoder_outputs
[
0
]
pooled_output
=
self
.
pooler
(
sequence_output
)
if
self
.
pooler
is
not
None
else
None
if
not
return_dict
:
return
(
sequence_output
,
pooled_output
)
+
encoder_outputs
[
1
:]
return
BaseModelOutputWithPooling
(
last_hidden_state
=
sequence_output
,
pooler_output
=
pooled_output
,
hidden_states
=
encoder_outputs
.
hidden_states
,
attentions
=
encoder_outputs
.
attentions
,
)
\ No newline at end of file
code2nl/model.py
View file @
ad24858
...
...
@@ -51,8 +51,8 @@ class Seq2Seq(nn.Module):
self
.
_tie_or_clone_weights
(
self
.
lm_head
,
self
.
encoder
.
embeddings
.
word_embeddings
)
def
forward
(
self
,
source_ids
=
None
,
source_mask
=
None
,
target_ids
=
None
,
target_mask
=
None
,
args
=
None
):
outputs
=
self
.
encoder
(
source_ids
,
attention_mask
=
source_mask
)
def
forward
(
self
,
source_ids
=
None
,
source_mask
=
None
,
target_ids
=
None
,
target_mask
=
None
,
patch_ids
=
None
,
args
=
None
):
outputs
=
self
.
encoder
(
source_ids
,
attention_mask
=
source_mask
,
patch_ids
=
patch_ids
)
encoder_output
=
outputs
[
0
]
.
permute
([
1
,
0
,
2
])
.
contiguous
()
if
target_ids
is
not
None
:
attn_mask
=-
1e4
*
(
1
-
self
.
bias
[:
target_ids
.
shape
[
1
],:
target_ids
.
shape
[
1
]])
...
...
code2nl/run.py
View file @
ad24858
...
...
@@ -35,10 +35,11 @@ from itertools import cycle
import
torch.nn
as
nn
from
model
import
Seq2Seq
from
tqdm
import
tqdm
,
trange
from
customized_roberta
import
RobertaModel
from
torch.utils.data
import
DataLoader
,
Dataset
,
SequentialSampler
,
RandomSampler
,
TensorDataset
from
torch.utils.data.distributed
import
DistributedSampler
from
transformers
import
(
WEIGHTS_NAME
,
AdamW
,
get_linear_schedule_with_warmup
,
RobertaConfig
,
Roberta
Model
,
Roberta
Tokenizer
)
RobertaConfig
,
RobertaTokenizer
)
MODEL_CLASSES
=
{
'roberta'
:
(
RobertaConfig
,
RobertaModel
,
RobertaTokenizer
)}
logging
.
basicConfig
(
format
=
'
%(asctime)
s -
%(levelname)
s -
%(name)
s -
%(message)
s'
,
...
...
@@ -50,11 +51,13 @@ class Example(object):
"""A single training/test example."""
def
__init__
(
self
,
idx
,
source
,
added
,
deleted
,
target
,
):
self
.
idx
=
idx
self
.
source
=
source
self
.
added
=
added
self
.
deleted
=
deleted
self
.
target
=
target
def
read_examples
(
filename
):
...
...
@@ -66,16 +69,13 @@ def read_examples(filename):
js
=
json
.
loads
(
line
)
if
'idx'
not
in
js
:
js
[
'idx'
]
=
idx
code
=
' '
.
join
(
js
[
'code_tokens'
])
.
replace
(
'
\n
'
,
' '
)
code
=
' '
.
join
(
code
.
strip
()
.
split
())
nl
=
' '
.
join
(
js
[
'docstring_tokens'
])
.
replace
(
'
\n
'
,
''
)
nl
=
' '
.
join
(
nl
.
strip
()
.
split
())
examples
.
append
(
Example
(
idx
=
idx
,
source
=
code
,
target
=
nl
,
)
added
=
js
[
'added'
],
deleted
=
js
[
'deleted'
],
target
=
js
[
'msg'
],
)
)
return
examples
...
...
@@ -88,13 +88,15 @@ class InputFeatures(object):
target_ids
,
source_mask
,
target_mask
,
patch_ids
,
):
self
.
example_id
=
example_id
self
.
source_ids
=
source_ids
self
.
target_ids
=
target_ids
self
.
source_mask
=
source_mask
self
.
target_mask
=
target_mask
self
.
target_mask
=
target_mask
self
.
patch_ids
=
patch_ids
...
...
@@ -102,19 +104,26 @@ def convert_examples_to_features(examples, tokenizer, args,stage=None):
features
=
[]
for
example_index
,
example
in
enumerate
(
examples
):
#source
source_tokens
=
tokenizer
.
tokenize
(
example
.
source
)[:
args
.
max_source_length
-
2
]
source_tokens
=
[
tokenizer
.
cls_token
]
+
source_tokens
+
[
tokenizer
.
sep_token
]
source_ids
=
tokenizer
.
convert_tokens_to_ids
(
source_tokens
)
added_tokens
=
[
tokenizer
.
cls_token
]
+
example
.
added
+
[
tokenizer
.
sep_token
]
deleted_tokens
=
example
.
deleted
+
[
tokenizer
.
sep_token
]
source_tokens
=
added_tokens
+
deleted_tokens
patch_ids
=
[
1
]
*
len
(
added_tokens
)
+
[
2
]
*
len
(
deleted_tokens
)
source_ids
=
tokenizer
.
convert_tokens_to_ids
(
source_tokens
)
source_mask
=
[
1
]
*
(
len
(
source_tokens
))
padding_length
=
args
.
max_source_length
-
len
(
source_ids
)
source_ids
+=
[
tokenizer
.
pad_token_id
]
*
padding_length
patch_ids
+=
[
0
]
*
padding_length
source_mask
+=
[
0
]
*
padding_length
assert
len
(
source_ids
)
==
args
.
max_source_length
assert
len
(
source_mask
)
==
args
.
max_source_length
assert
len
(
patch_ids
)
==
args
.
max_source_length
#target
if
stage
==
"test"
:
target_tokens
=
tokenizer
.
tokenize
(
"None"
)
else
:
target_tokens
=
tokenizer
.
tokenize
(
example
.
target
)[:
args
.
max_target_length
-
2
]
target_tokens
=
(
example
.
target
)[:
args
.
max_target_length
-
2
]
target_tokens
=
[
tokenizer
.
cls_token
]
+
target_tokens
+
[
tokenizer
.
sep_token
]
target_ids
=
tokenizer
.
convert_tokens_to_ids
(
target_tokens
)
target_mask
=
[
1
]
*
len
(
target_ids
)
...
...
@@ -129,6 +138,7 @@ def convert_examples_to_features(examples, tokenizer, args,stage=None):
logger
.
info
(
"source_tokens: {}"
.
format
([
x
.
replace
(
'
\u0120
'
,
'_'
)
for
x
in
source_tokens
]))
logger
.
info
(
"source_ids: {}"
.
format
(
' '
.
join
(
map
(
str
,
source_ids
))))
logger
.
info
(
"patch_ids: {}"
.
format
(
' '
.
join
(
map
(
str
,
patch_ids
))))
logger
.
info
(
"source_mask: {}"
.
format
(
' '
.
join
(
map
(
str
,
source_mask
))))
logger
.
info
(
"target_tokens: {}"
.
format
([
x
.
replace
(
'
\u0120
'
,
'_'
)
for
x
in
target_tokens
]))
...
...
@@ -142,6 +152,7 @@ def convert_examples_to_features(examples, tokenizer, args,stage=None):
target_ids
,
source_mask
,
target_mask
,
patch_ids
,
)
)
return
features
...
...
@@ -255,7 +266,7 @@ def main():
tokenizer
=
tokenizer_class
.
from_pretrained
(
args
.
tokenizer_name
if
args
.
tokenizer_name
else
args
.
model_name_or_path
,
do_lower_case
=
args
.
do_lower_case
)
#budild model
encoder
=
model_class
.
from_pretrained
(
args
.
model_name_or_path
,
config
=
config
)
encoder
=
model_class
(
config
=
config
)
decoder_layer
=
nn
.
TransformerDecoderLayer
(
d_model
=
config
.
hidden_size
,
nhead
=
config
.
num_attention_heads
)
decoder
=
nn
.
TransformerDecoder
(
decoder_layer
,
num_layers
=
6
)
model
=
Seq2Seq
(
encoder
=
encoder
,
decoder
=
decoder
,
config
=
config
,
...
...
@@ -263,7 +274,7 @@ def main():
sos_id
=
tokenizer
.
cls_token_id
,
eos_id
=
tokenizer
.
sep_token_id
)
if
args
.
load_model_path
is
not
None
:
logger
.
info
(
"reload model from {}"
.
format
(
args
.
load_model_path
))
model
.
load_state_dict
(
torch
.
load
(
args
.
load_model_path
))
model
.
load_state_dict
(
torch
.
load
(
args
.
load_model_path
)
,
strict
=
False
)
model
.
to
(
device
)
if
args
.
local_rank
!=
-
1
:
...
...
@@ -289,7 +300,8 @@ def main():
all_source_mask
=
torch
.
tensor
([
f
.
source_mask
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_target_ids
=
torch
.
tensor
([
f
.
target_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
all_target_mask
=
torch
.
tensor
([
f
.
target_mask
for
f
in
train_features
],
dtype
=
torch
.
long
)
train_data
=
TensorDataset
(
all_source_ids
,
all_source_mask
,
all_target_ids
,
all_target_mask
)
all_patch_ids
=
torch
.
tensor
([
f
.
patch_ids
for
f
in
train_features
],
dtype
=
torch
.
long
)
train_data
=
TensorDataset
(
all_source_ids
,
all_source_mask
,
all_target_ids
,
all_target_mask
,
all_patch_ids
)
if
args
.
local_rank
==
-
1
:
train_sampler
=
RandomSampler
(
train_data
)
...
...
@@ -327,8 +339,9 @@ def main():
for
step
in
bar
:
batch
=
next
(
train_dataloader
)
batch
=
tuple
(
t
.
to
(
device
)
for
t
in
batch
)
source_ids
,
source_mask
,
target_ids
,
target_mask
=
batch
loss
,
_
,
_
=
model
(
source_ids
=
source_ids
,
source_mask
=
source_mask
,
target_ids
=
target_ids
,
target_mask
=
target_mask
)
source_ids
,
source_mask
,
target_ids
,
target_mask
,
patch_ids
=
batch
loss
,
_
,
_
=
model
(
source_ids
=
source_ids
,
source_mask
=
source_mask
,
target_ids
=
target_ids
,
target_mask
=
target_mask
,
patch_ids
=
patch_ids
)
if
args
.
n_gpu
>
1
:
loss
=
loss
.
mean
()
# mean() to average on multi-gpu.
...
...
@@ -363,7 +376,8 @@ def main():
all_source_mask
=
torch
.
tensor
([
f
.
source_mask
for
f
in
eval_features
],
dtype
=
torch
.
long
)
all_target_ids
=
torch
.
tensor
([
f
.
target_ids
for
f
in
eval_features
],
dtype
=
torch
.
long
)
all_target_mask
=
torch
.
tensor
([
f
.
target_mask
for
f
in
eval_features
],
dtype
=
torch
.
long
)
eval_data
=
TensorDataset
(
all_source_ids
,
all_source_mask
,
all_target_ids
,
all_target_mask
)
all_patch_ids
=
torch
.
tensor
([
f
.
patch_ids
for
f
in
eval_features
],
dtype
=
torch
.
long
)
eval_data
=
TensorDataset
(
all_source_ids
,
all_source_mask
,
all_target_ids
,
all_target_mask
,
all_patch_ids
)
dev_dataset
[
'dev_loss'
]
=
eval_examples
,
eval_data
eval_sampler
=
SequentialSampler
(
eval_data
)
eval_dataloader
=
DataLoader
(
eval_data
,
sampler
=
eval_sampler
,
batch_size
=
args
.
eval_batch_size
)
...
...
@@ -377,11 +391,11 @@ def main():
eval_loss
,
tokens_num
=
0
,
0
for
batch
in
eval_dataloader
:
batch
=
tuple
(
t
.
to
(
device
)
for
t
in
batch
)
source_ids
,
source_mask
,
target_ids
,
target_mask
=
batch
source_ids
,
source_mask
,
target_ids
,
target_mask
,
patch_ids
=
batch
with
torch
.
no_grad
():
_
,
loss
,
num
=
model
(
source_ids
=
source_ids
,
source_mask
=
source_mask
,
target_ids
=
target_ids
,
target_mask
=
target_mask
)
target_ids
=
target_ids
,
target_mask
=
target_mask
,
patch_ids
=
patch_ids
)
eval_loss
+=
loss
.
sum
()
.
item
()
tokens_num
+=
num
.
sum
()
.
item
()
#Pring loss of dev dataset
...
...
@@ -423,7 +437,8 @@ def main():
eval_features
=
convert_examples_to_features
(
eval_examples
,
tokenizer
,
args
,
stage
=
'test'
)
all_source_ids
=
torch
.
tensor
([
f
.
source_ids
for
f
in
eval_features
],
dtype
=
torch
.
long
)
all_source_mask
=
torch
.
tensor
([
f
.
source_mask
for
f
in
eval_features
],
dtype
=
torch
.
long
)
eval_data
=
TensorDataset
(
all_source_ids
,
all_source_mask
)
all_patch_ids
=
torch
.
tensor
([
f
.
patch_ids
for
f
in
eval_features
],
dtype
=
torch
.
long
)
eval_data
=
TensorDataset
(
all_source_ids
,
all_source_mask
,
all_patch_ids
)
dev_dataset
[
'dev_bleu'
]
=
eval_examples
,
eval_data
...
...
@@ -435,9 +450,9 @@ def main():
p
=
[]
for
batch
in
eval_dataloader
:
batch
=
tuple
(
t
.
to
(
device
)
for
t
in
batch
)
source_ids
,
source_mask
=
batch
source_ids
,
source_mask
,
patch_ids
=
batch
with
torch
.
no_grad
():
preds
=
model
(
source_ids
=
source_ids
,
source_mask
=
source_mask
)
preds
=
model
(
source_ids
=
source_ids
,
source_mask
=
source_mask
,
patch_ids
=
patch_ids
)
for
pred
in
preds
:
t
=
pred
[
0
]
.
cpu
()
.
numpy
()
t
=
list
(
t
)
...
...
@@ -481,7 +496,8 @@ def main():
eval_features
=
convert_examples_to_features
(
eval_examples
,
tokenizer
,
args
,
stage
=
'test'
)
all_source_ids
=
torch
.
tensor
([
f
.
source_ids
for
f
in
eval_features
],
dtype
=
torch
.
long
)
all_source_mask
=
torch
.
tensor
([
f
.
source_mask
for
f
in
eval_features
],
dtype
=
torch
.
long
)
eval_data
=
TensorDataset
(
all_source_ids
,
all_source_mask
)
all_patch_ids
=
torch
.
tensor
([
f
.
patch_ids
for
f
in
eval_features
],
dtype
=
torch
.
long
)
eval_data
=
TensorDataset
(
all_source_ids
,
all_source_mask
,
all_patch_ids
)
# Calculate bleu
eval_sampler
=
SequentialSampler
(
eval_data
)
...
...
@@ -491,9 +507,9 @@ def main():
p
=
[]
for
batch
in
tqdm
(
eval_dataloader
,
total
=
len
(
eval_dataloader
)):
batch
=
tuple
(
t
.
to
(
device
)
for
t
in
batch
)
source_ids
,
source_mask
=
batch
source_ids
,
source_mask
,
patch_ids
=
batch
with
torch
.
no_grad
():
preds
=
model
(
source_ids
=
source_ids
,
source_mask
=
source_mask
)
preds
=
model
(
source_ids
=
source_ids
,
source_mask
=
source_mask
,
patch_ids
=
patch_ids
)
for
pred
in
preds
:
t
=
pred
[
0
]
.
cpu
()
.
numpy
()
t
=
list
(
t
)
...
...
Please
register
or
login
to post a comment