graykode

(fixed) blank space is added in code tokens

...@@ -59,7 +59,7 @@ def jobs(repo, args): ...@@ -59,7 +59,7 @@ def jobs(repo, args):
59 for line, code in mod.diff_parsed["deleted"]: 59 for line, code in mod.diff_parsed["deleted"]:
60 deleted.extend(args.tokenizer.tokenize(code)) 60 deleted.extend(args.tokenizer.tokenize(code))
61 61
62 - if len(added) + len(deleted) <= args.max_source_length: 62 + if added and deleted and len(added) + len(deleted) <= args.max_source_length - 3:
63 with jsonlines.open(args.output_file, mode="a") as writer: 63 with jsonlines.open(args.output_file, mode="a") as writer:
64 writer.write( 64 writer.write(
65 { 65 {
...@@ -105,7 +105,7 @@ def main(args): ...@@ -105,7 +105,7 @@ def main(args):
105 ) 105 )
106 write_jsonl( 106 write_jsonl(
107 data[int(n_data * 0.9):int(n_data * 0.95)], 107 data[int(n_data * 0.9):int(n_data * 0.95)],
108 - path=args.output_dir, mode='validation' 108 + path=args.output_dir, mode='valid'
109 ) 109 )
110 write_jsonl( 110 write_jsonl(
111 data[int(n_data * 0.95):], 111 data[int(n_data * 0.95):],
......