Committed by
GitHub
Merge pull request #1 from graykode/0.1.0
JavaScript Language is supported!!
Showing
18 changed files
with
658 additions
and
469 deletions
... | @@ -2,12 +2,15 @@ language: python | ... | @@ -2,12 +2,15 @@ language: python |
2 | python: | 2 | python: |
3 | - "3.6" | 3 | - "3.6" |
4 | 4 | ||
5 | +env: | ||
6 | + - LANGUAGE="py" | ||
7 | + | ||
5 | services: | 8 | services: |
6 | - docker | 9 | - docker |
7 | 10 | ||
8 | before_install: | 11 | before_install: |
9 | - - docker pull graykode/commit-autosuggestions | 12 | + - docker pull graykode/commit-autosuggestions:${LANGUAGE} |
10 | - - docker run -it -d -p 5000:5000 --restart always graykode/commit-autosuggestions | 13 | + - docker run -it -d -p 5000:5000 --restart always graykode/commit-autosuggestions:${LANGUAGE} |
11 | 14 | ||
12 | # command to install dependencies | 15 | # command to install dependencies |
13 | install: | 16 | install: | ... | ... |
... | @@ -46,20 +46,18 @@ Recommended Commit Message : Remove unused imports | ... | @@ -46,20 +46,18 @@ Recommended Commit Message : Remove unused imports |
46 | To solve this problem, use a new embedding called [`patch_type_embeddings`](https://github.com/graykode/commit-autosuggestions/blob/master/commit/model/diff_roberta.py#L40) that can distinguish added and deleted, just as the XLM(Lample et al, 2019) used language embeddeding. (1 for added, 2 for deleted.) | 46 | To solve this problem, use a new embedding called [`patch_type_embeddings`](https://github.com/graykode/commit-autosuggestions/blob/master/commit/model/diff_roberta.py#L40) that can distinguish added and deleted, just as the XLM(Lample et al, 2019) used language embeddeding. (1 for added, 2 for deleted.) |
47 | 47 | ||
48 | ### Language support | 48 | ### Language support |
49 | -| Language | Added | Diff | | 49 | +| Language | Added | Diff | Data(Only Diff) | Weights | |
50 | -| :------------- | :---: | :---:| | 50 | +| :------------- | :---: | :---:| :---: | :---:| |
51 | -| Python | ✅ | ✅ | | 51 | +| Python | ✅ | ✅ | [423k](https://drive.google.com/drive/folders/1_8lQmzTH95Nc-4MKd1RP3x4BVc8tBA6W?usp=sharing) | [Link](https://drive.google.com/drive/folders/1OwM7_FiLiwVJAhAanBPWtPw3Hz3Dszbh?usp=sharing) | |
52 | -| JavaScript | ⬜ | ⬜ | | 52 | +| JavaScript | ✅ | ✅ | [514k](https://drive.google.com/drive/folders/1-Hv0VZWSAGqs-ewNT6NhLKEqDH2oa1az?usp=sharing) | [Link](https://drive.google.com/drive/folders/1Jw8vXfxUXsfElga_Gi6e7Uhfc_HlmOuD?usp=sharing) | |
53 | -| Go | ⬜ | ⬜ | | 53 | +| Go | ⬜ | ⬜ | ⬜ | ⬜ | |
54 | -| JAVA | ⬜ | ⬜ | | 54 | +| JAVA | ⬜ | ⬜ | ⬜ | ⬜ | |
55 | -| Ruby | ⬜ | ⬜ | | 55 | +| Ruby | ⬜ | ⬜ | ⬜ | ⬜ | |
56 | -| PHP | ⬜ | ⬜ | | 56 | +| PHP | ⬜ | ⬜ | ⬜ | ⬜ | |
57 | * ✅ — Supported | 57 | * ✅ — Supported |
58 | -* 🔶 — Partial support | ||
59 | -* 🚧 — Under development | ||
60 | * ⬜ - N/A ️ | 58 | * ⬜ - N/A ️ |
61 | 59 | ||
62 | -We plan to slowly conquer languages that are not currently supported. However, I also need to use expensive GPU instances of AWS or GCP to train about the above languages. Please do a simple sponsor for this! | 60 | +We plan to slowly conquer languages that are not currently supported. However, I also need to use expensive GPU instances of AWS or GCP to train about the above languages. Please do a simple sponsor for this! Add data is [CodeSearchNet dataset](https://drive.google.com/uc?id=1rd2Tc6oUWBo7JouwexW3ksQ0PaOhUr6h). |
63 | 61 | ||
64 | ### Quick Start | 62 | ### Quick Start |
65 | To run this project, you need a flask-based inference server (GPU) and a client (commit module). If you don't have a GPU, don't worry, you can use it through Google Colab. | 63 | To run this project, you need a flask-based inference server (GPU) and a client (commit module). If you don't have a GPU, don't worry, you can use it through Google Colab. |
... | @@ -68,9 +66,18 @@ To run this project, you need a flask-based inference server (GPU) and a client | ... | @@ -68,9 +66,18 @@ To run this project, you need a flask-based inference server (GPU) and a client |
68 | Prepare Docker and Nvidia-docker before running the server. | 66 | Prepare Docker and Nvidia-docker before running the server. |
69 | 67 | ||
70 | ##### 1-a. If you have GPU machine. | 68 | ##### 1-a. If you have GPU machine. |
71 | -Serve flask server with Nvidia Docker | 69 | +Serve flask server with Nvidia Docker. Check the docker tag for programming language in [here](https://hub.docker.com/repository/registry-1.docker.io/graykode/commit-autosuggestions/tags). |
70 | +| Language | Tag | | ||
71 | +| :------------- | :---: | | ||
72 | +| Python | py | | ||
73 | +| JavaScript | js | | ||
74 | +| Go | go | | ||
75 | +| JAVA | java | | ||
76 | +| Ruby | ruby | | ||
77 | +| PHP | php | | ||
78 | + | ||
72 | ```shell script | 79 | ```shell script |
73 | -$ docker run -it --gpus 0 -p 5000:5000 commit-autosuggestions:0.1-gpu | 80 | +$ docker run -it -d --gpus 0 -p 5000:5000 graykode/commit-autosuggestions:{language} |
74 | ``` | 81 | ``` |
75 | 82 | ||
76 | ##### 1-b. If you don't have GPU machine. | 83 | ##### 1-b. If you don't have GPU machine. | ... | ... |
... | @@ -146,7 +146,7 @@ def main(args): | ... | @@ -146,7 +146,7 @@ def main(args): |
146 | 146 | ||
147 | if __name__ == '__main__': | 147 | if __name__ == '__main__': |
148 | parser = argparse.ArgumentParser(description="") | 148 | parser = argparse.ArgumentParser(description="") |
149 | - parser.add_argument("--load_model_path", default='weight', type=str, | 149 | + parser.add_argument("--load_model_path", type=str, required=True, |
150 | help="Path to trained model: Should contain the .bin files") | 150 | help="Path to trained model: Should contain the .bin files") |
151 | 151 | ||
152 | parser.add_argument("--model_type", default='roberta', type=str, | 152 | parser.add_argument("--model_type", default='roberta', type=str, | ... | ... |
change_logs/v0.1.0.md
0 → 100644
1 | +# Change Log | ||
2 | +version : v0.1.0 | ||
3 | + | ||
4 | +## change things | ||
5 | + | ||
6 | +### Bug Fixes | ||
7 | +- Modify the weight path in the Dockerfile. | ||
8 | + | ||
9 | +### New Features | ||
10 | +- JavaScript Language Support. | ||
11 | +- Detach multiple settings (Unittest, Dockerfile) for Language support. | ||
12 | + | ||
13 | +### New Examples | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
1 | { | 1 | { |
2 | - "nbformat": 4, | 2 | + "nbformat": 4, |
3 | - "nbformat_minor": 0, | 3 | + "nbformat_minor": 0, |
4 | - "metadata": { | 4 | + "metadata": { |
5 | - "colab": { | 5 | + "colab": { |
6 | - "name": "commit-autosuggestions.ipynb", | 6 | + "name": "commit-autosuggestions.ipynb", |
7 | - "provenance": [], | 7 | + "provenance": [], |
8 | - "collapsed_sections": [], | 8 | + "collapsed_sections": [], |
9 | - "toc_visible": true | 9 | + "toc_visible": true |
10 | - }, | ||
11 | - "kernelspec": { | ||
12 | - "name": "python3", | ||
13 | - "display_name": "Python 3" | ||
14 | - }, | ||
15 | - "accelerator": "GPU" | ||
16 | }, | 10 | }, |
17 | - "cells": [ | 11 | + "kernelspec": { |
18 | - { | 12 | + "name": "python3", |
19 | - "cell_type": "markdown", | 13 | + "display_name": "Python 3" |
20 | - "metadata": { | 14 | + }, |
21 | - "id": "DZ7rFp2gzuNS" | 15 | + "accelerator": "GPU" |
22 | - }, | 16 | + }, |
23 | - "source": [ | 17 | + "cells": [ |
24 | - "## Start commit-autosuggestions server\n", | 18 | + { |
25 | - "Running flask app server in Google Colab for people without GPU" | 19 | + "cell_type": "markdown", |
26 | - ] | 20 | + "metadata": { |
27 | - }, | 21 | + "id": "DZ7rFp2gzuNS" |
28 | - { | 22 | + }, |
29 | - "cell_type": "markdown", | 23 | + "source": [ |
30 | - "metadata": { | 24 | + "## Start commit-autosuggestions server\n", |
31 | - "id": "d8Lyin2I3wHq" | 25 | + "Running flask app server in Google Colab for people without GPU" |
32 | - }, | 26 | + ] |
33 | - "source": [ | 27 | + }, |
34 | - "#### Clone github repository" | 28 | + { |
35 | - ] | 29 | + "cell_type": "markdown", |
36 | - }, | 30 | + "metadata": { |
37 | - { | 31 | + "id": "d8Lyin2I3wHq" |
38 | - "cell_type": "code", | 32 | + }, |
39 | - "metadata": { | 33 | + "source": [ |
40 | - "id": "e_cu9igvzjcs" | 34 | + "#### Clone github repository" |
41 | - }, | 35 | + ] |
42 | - "source": [ | 36 | + }, |
43 | - "!git clone https://github.com/graykode/commit-autosuggestions.git\n", | 37 | + { |
44 | - "%cd commit-autosuggestions\n", | 38 | + "cell_type": "code", |
45 | - "!pip install -r requirements.txt" | 39 | + "metadata": { |
46 | - ], | 40 | + "id": "e_cu9igvzjcs" |
47 | - "execution_count": null, | 41 | + }, |
48 | - "outputs": [] | 42 | + "source": [ |
49 | - }, | 43 | + "!git clone https://github.com/graykode/commit-autosuggestions.git\n", |
50 | - { | 44 | + "%cd commit-autosuggestions\n", |
51 | - "cell_type": "markdown", | 45 | + "!pip install -r requirements.txt" |
52 | - "metadata": { | 46 | + ], |
53 | - "id": "PFKn5QZr0dQx" | 47 | + "execution_count": null, |
54 | - }, | 48 | + "outputs": [] |
55 | - "source": [ | 49 | + }, |
56 | - "#### Download model weights\n", | 50 | + { |
57 | - "\n", | 51 | + "cell_type": "markdown", |
58 | - "Download the two weights of model from the google drive through the gdown module.\n", | 52 | + "metadata": { |
59 | - "1. [Added model](https://drive.google.com/uc?id=1YrkwfM-0VBCJaa9NYaXUQPODdGPsmQY4) : A model trained Code2NL on Python using pre-trained CodeBERT (Feng at al, 2020).\n", | 53 | + "id": "PFKn5QZr0dQx" |
60 | - "2. [Diff model](https://drive.google.com/uc?id=1--gcVVix92_Fp75A-mWH0pJS0ahlni5m) : A model retrained by initializing with the weight of model (1), adding embedding of the added and deleted parts(`patch_ids_embedding`) of the code." | 54 | + }, |
61 | - ] | 55 | + "source": [ |
62 | - }, | 56 | + "#### Download model weights\n", |
63 | - { | 57 | + "\n", |
64 | - "cell_type": "code", | 58 | + "Download the two weights of model from the google drive through the gdown module.\n", |
65 | - "metadata": { | 59 | + "1. Added model : A model trained Code2NL on Python using pre-trained CodeBERT (Feng at al, 2020).\n", |
66 | - "id": "P9-EBpxt0Dp0" | 60 | + "2. Diff model : A model retrained by initializing with the weight of model (1), adding embedding of the added and deleted parts(`patch_ids_embedding`) of the code.\n", |
67 | - }, | 61 | + "\n", |
68 | - "source": [ | 62 | + "Download pre-trained weight\n", |
69 | - "!pip install gdown \\\n", | 63 | + "\n", |
70 | - " && gdown \"https://drive.google.com/uc?id=1YrkwfM-0VBCJaa9NYaXUQPODdGPsmQY4\" -O weight/added/pytorch_model.bin \\\n", | 64 | + "Language | Added | Diff\n", |
71 | - " && gdown \"https://drive.google.com/uc?id=1--gcVVix92_Fp75A-mWH0pJS0ahlni5m\" -O weight/diff/pytorch_model.bin" | 65 | + "--- | --- | ---\n", |
72 | - ], | 66 | + "python | 1YrkwfM-0VBCJaa9NYaXUQPODdGPsmQY4 | 1--gcVVix92_Fp75A-mWH0pJS0ahlni5m\n", |
73 | - "execution_count": null, | 67 | + "javascript | 1-F68ymKxZ-htCzQ8_Y9iHexs2SJmP5Gc | 1-39rmu-3clwebNURMQGMt-oM4HsAkbsf" |
74 | - "outputs": [] | 68 | + ] |
75 | - }, | 69 | + }, |
76 | - { | 70 | + { |
77 | - "cell_type": "markdown", | 71 | + "cell_type": "code", |
78 | - "metadata": { | 72 | + "metadata": { |
79 | - "id": "org4Gqdv3iUu" | 73 | + "id": "P9-EBpxt0Dp0" |
80 | - }, | 74 | + }, |
81 | - "source": [ | 75 | + "source": [ |
82 | - "#### ngrok setting with flask\n", | 76 | + "ADD_MODEL='1YrkwfM-0VBCJaa9NYaXUQPODdGPsmQY4'\n", |
83 | - "\n", | 77 | + "DIFF_MODEL='1--gcVVix92_Fp75A-mWH0pJS0ahlni5m'\n", |
84 | - "Before starting the server, you need to configure ngrok to open this notebook to the outside. I have referred [this jupyter notebook](https://github.com/alievk/avatarify/blob/master/avatarify.ipynb) in detail." | 78 | + "\n", |
85 | - ] | 79 | + "!pip install gdown \\\n", |
86 | - }, | 80 | + " && gdown \"https://drive.google.com/uc?id=$ADD_MODEL\" -O weight/added/pytorch_model.bin \\\n", |
87 | - { | 81 | + " && gdown \"https://drive.google.com/uc?id=$DIFF_MODEL\" -O weight/diff/pytorch_model.bin" |
88 | - "cell_type": "code", | 82 | + ], |
89 | - "metadata": { | 83 | + "execution_count": null, |
90 | - "id": "lZA3kuuG1Crj" | 84 | + "outputs": [] |
91 | - }, | 85 | + }, |
92 | - "source": [ | 86 | + { |
93 | - "!pip install flask-ngrok" | 87 | + "cell_type": "markdown", |
94 | - ], | 88 | + "metadata": { |
95 | - "execution_count": null, | 89 | + "id": "org4Gqdv3iUu" |
96 | - "outputs": [] | 90 | + }, |
97 | - }, | 91 | + "source": [ |
98 | - { | 92 | + "#### ngrok setting with flask\n", |
99 | - "cell_type": "markdown", | 93 | + "\n", |
100 | - "metadata": { | 94 | + "Before starting the server, you need to configure ngrok to open this notebook to the outside. I have referred [this jupyter notebook](https://github.com/alievk/avatarify/blob/master/avatarify.ipynb) in detail." |
101 | - "id": "hR78FRCMcqrZ" | 95 | + ] |
102 | - }, | 96 | + }, |
103 | - "source": [ | 97 | + { |
104 | - "Go to https://dashboard.ngrok.com/auth/your-authtoken (sign up if required), copy your authtoken and put it below.\n", | 98 | + "cell_type": "code", |
105 | - "\n" | 99 | + "metadata": { |
106 | - ] | 100 | + "id": "lZA3kuuG1Crj" |
107 | - }, | 101 | + }, |
108 | - { | 102 | + "source": [ |
109 | - "cell_type": "code", | 103 | + "!pip install flask-ngrok" |
110 | - "metadata": { | 104 | + ], |
111 | - "id": "L_mInbOKcoc2" | 105 | + "execution_count": null, |
112 | - }, | 106 | + "outputs": [] |
113 | - "source": [ | 107 | + }, |
114 | - "# Paste your authtoken here in quotes\n", | 108 | + { |
115 | - "authtoken = \"21KfrFEW1BptdPPM4SS_7s1Z4HwozyXX9NP2fHC12\"" | 109 | + "cell_type": "markdown", |
116 | - ], | 110 | + "metadata": { |
117 | - "execution_count": null, | 111 | + "id": "hR78FRCMcqrZ" |
118 | - "outputs": [] | 112 | + }, |
119 | - }, | 113 | + "source": [ |
120 | - { | 114 | + "Go to https://dashboard.ngrok.com/auth/your-authtoken (sign up if required), copy your authtoken and put it below.\n", |
121 | - "cell_type": "markdown", | 115 | + "\n" |
122 | - "metadata": { | 116 | + ] |
123 | - "id": "QwCN4YFUc0M8" | 117 | + }, |
124 | - }, | 118 | + { |
125 | - "source": [ | 119 | + "cell_type": "code", |
126 | - "Set your region\n", | 120 | + "metadata": { |
127 | - "\n", | 121 | + "id": "L_mInbOKcoc2" |
128 | - "Code | Region\n", | 122 | + }, |
129 | - "--- | ---\n", | 123 | + "source": [ |
130 | - "us | United States\n", | 124 | + "# Paste your authtoken here in quotes\n", |
131 | - "eu | Europe\n", | 125 | + "authtoken = \"21KfrFEW1BptdPPM4SS_7s1Z4HwozyXX9NP2fHC12\"" |
132 | - "ap | Asia/Pacific\n", | 126 | + ], |
133 | - "au | Australia\n", | 127 | + "execution_count": null, |
134 | - "sa | South America\n", | 128 | + "outputs": [] |
135 | - "jp | Japan\n", | 129 | + }, |
136 | - "in | India" | 130 | + { |
137 | - ] | 131 | + "cell_type": "markdown", |
138 | - }, | 132 | + "metadata": { |
139 | - { | 133 | + "id": "QwCN4YFUc0M8" |
140 | - "cell_type": "code", | 134 | + }, |
141 | - "metadata": { | 135 | + "source": [ |
142 | - "id": "p4LSNN2xc0dQ" | 136 | + "Set your region\n", |
143 | - }, | 137 | + "\n", |
144 | - "source": [ | 138 | + "Code | Region\n", |
145 | - "# Set your region here in quotes\n", | 139 | + "--- | ---\n", |
146 | - "region = \"jp\"\n", | 140 | + "us | United States\n", |
147 | - "\n", | 141 | + "eu | Europe\n", |
148 | - "# Input and output ports for communication\n", | 142 | + "ap | Asia/Pacific\n", |
149 | - "local_in_port = 5000\n", | 143 | + "au | Australia\n", |
150 | - "local_out_port = 5000" | 144 | + "sa | South America\n", |
151 | - ], | 145 | + "jp | Japan\n", |
152 | - "execution_count": null, | 146 | + "in | India" |
153 | - "outputs": [] | 147 | + ] |
154 | - }, | 148 | + }, |
155 | - { | 149 | + { |
156 | - "cell_type": "code", | 150 | + "cell_type": "code", |
157 | - "metadata": { | 151 | + "metadata": { |
158 | - "id": "kg56PVrOdhi1" | 152 | + "id": "p4LSNN2xc0dQ" |
159 | - }, | 153 | + }, |
160 | - "source": [ | 154 | + "source": [ |
161 | - "config =\\\n", | 155 | + "# Set your region here in quotes\n", |
162 | - "f\"\"\"\n", | 156 | + "region = \"jp\"\n", |
163 | - "authtoken: {authtoken}\n", | 157 | + "\n", |
164 | - "region: {region}\n", | 158 | + "# Input and output ports for communication\n", |
165 | - "console_ui: False\n", | 159 | + "local_in_port = 5000\n", |
166 | - "tunnels:\n", | 160 | + "local_out_port = 5000" |
167 | - " input:\n", | 161 | + ], |
168 | - " addr: {local_in_port}\n", | 162 | + "execution_count": null, |
169 | - " proto: http \n", | 163 | + "outputs": [] |
170 | - " output:\n", | 164 | + }, |
171 | - " addr: {local_out_port}\n", | 165 | + { |
172 | - " proto: http\n", | 166 | + "cell_type": "code", |
173 | - "\"\"\"\n", | 167 | + "metadata": { |
174 | - "\n", | 168 | + "id": "kg56PVrOdhi1" |
175 | - "with open('ngrok.conf', 'w') as f:\n", | 169 | + }, |
176 | - " f.write(config)" | 170 | + "source": [ |
177 | - ], | 171 | + "config =\\\n", |
178 | - "execution_count": null, | 172 | + "f\"\"\"\n", |
179 | - "outputs": [] | 173 | + "authtoken: {authtoken}\n", |
180 | - }, | 174 | + "region: {region}\n", |
181 | - { | 175 | + "console_ui: False\n", |
182 | - "cell_type": "code", | 176 | + "tunnels:\n", |
183 | - "metadata": { | 177 | + " input:\n", |
184 | - "id": "hrWDrw_YdjIy" | 178 | + " addr: {local_in_port}\n", |
185 | - }, | 179 | + " proto: http \n", |
186 | - "source": [ | 180 | + " output:\n", |
187 | - "import time\n", | 181 | + " addr: {local_out_port}\n", |
188 | - "from subprocess import Popen, PIPE\n", | 182 | + " proto: http\n", |
189 | - "\n", | 183 | + "\"\"\"\n", |
190 | - "# (Re)Open tunnel\n", | 184 | + "\n", |
191 | - "ps = Popen('./scripts/open_tunnel_ngrok.sh', stdout=PIPE, stderr=PIPE)\n", | 185 | + "with open('ngrok.conf', 'w') as f:\n", |
192 | - "time.sleep(3)" | 186 | + " f.write(config)" |
193 | - ], | 187 | + ], |
194 | - "execution_count": null, | 188 | + "execution_count": null, |
195 | - "outputs": [] | 189 | + "outputs": [] |
196 | - }, | 190 | + }, |
197 | - { | 191 | + { |
198 | - "cell_type": "code", | 192 | + "cell_type": "code", |
199 | - "metadata": { | 193 | + "metadata": { |
200 | - "id": "pJgdFr0Fdjoq", | 194 | + "id": "hrWDrw_YdjIy" |
201 | - "outputId": "3948f70b-d4f3-4ed8-a864-fe5c6df50809", | 195 | + }, |
202 | - "colab": { | 196 | + "source": [ |
203 | - "base_uri": "https://localhost:8080/" | 197 | + "import time\n", |
204 | - } | 198 | + "from subprocess import Popen, PIPE\n", |
205 | - }, | 199 | + "\n", |
206 | - "source": [ | 200 | + "# (Re)Open tunnel\n", |
207 | - "# Get tunnel addresses\n", | 201 | + "ps = Popen('./scripts/open_tunnel_ngrok.sh', stdout=PIPE, stderr=PIPE)\n", |
208 | - "try:\n", | 202 | + "time.sleep(3)" |
209 | - " in_addr, out_addr = get_tunnel_adresses()\n", | 203 | + ], |
210 | - " print(\"Tunnel opened\")\n", | 204 | + "execution_count": null, |
211 | - "except Exception as e:\n", | 205 | + "outputs": [] |
212 | - " [print(l.decode(), end='') for l in ps.stdout.readlines()]\n", | 206 | + }, |
213 | - " print(\"Something went wrong, reopen the tunnel\")" | 207 | + { |
214 | - ], | 208 | + "cell_type": "code", |
215 | - "execution_count": null, | 209 | + "metadata": { |
216 | - "outputs": [ | 210 | + "id": "pJgdFr0Fdjoq", |
217 | - { | 211 | + "outputId": "3948f70b-d4f3-4ed8-a864-fe5c6df50809", |
218 | - "output_type": "stream", | 212 | + "colab": { |
219 | - "text": [ | 213 | + "base_uri": "https://localhost:8080/" |
220 | - "Opening tunnel\n", | 214 | + } |
221 | - "Something went wrong, reopen the tunnel\n" | 215 | + }, |
222 | - ], | 216 | + "source": [ |
223 | - "name": "stdout" | 217 | + "# Get tunnel addresses\n", |
224 | - } | 218 | + "try:\n", |
225 | - ] | 219 | + " in_addr, out_addr = get_tunnel_adresses()\n", |
226 | - }, | 220 | + " print(\"Tunnel opened\")\n", |
227 | - { | 221 | + "except Exception as e:\n", |
228 | - "cell_type": "markdown", | 222 | + " [print(l.decode(), end='') for l in ps.stdout.readlines()]\n", |
229 | - "metadata": { | 223 | + " print(\"Something went wrong, reopen the tunnel\")" |
230 | - "id": "cEZ-O0wz74OJ" | 224 | + ], |
231 | - }, | 225 | + "execution_count": null, |
232 | - "source": [ | 226 | + "outputs": [ |
233 | - "#### Run you server!" | ||
234 | - ] | ||
235 | - }, | ||
236 | - { | ||
237 | - "cell_type": "code", | ||
238 | - "metadata": { | ||
239 | - "id": "7PRkeYTL8Y_6" | ||
240 | - }, | ||
241 | - "source": [ | ||
242 | - "import os\n", | ||
243 | - "import torch\n", | ||
244 | - "import argparse\n", | ||
245 | - "from tqdm import tqdm\n", | ||
246 | - "import torch.nn as nn\n", | ||
247 | - "from torch.utils.data import TensorDataset, DataLoader, SequentialSampler\n", | ||
248 | - "from transformers import (RobertaConfig, RobertaTokenizer)\n", | ||
249 | - "\n", | ||
250 | - "from commit.model import Seq2Seq\n", | ||
251 | - "from commit.utils import (Example, convert_examples_to_features)\n", | ||
252 | - "from commit.model.diff_roberta import RobertaModel\n", | ||
253 | - "\n", | ||
254 | - "from flask import Flask, jsonify, request\n", | ||
255 | - "\n", | ||
256 | - "MODEL_CLASSES = {'roberta': (RobertaConfig, RobertaModel, RobertaTokenizer)}" | ||
257 | - ], | ||
258 | - "execution_count": null, | ||
259 | - "outputs": [] | ||
260 | - }, | ||
261 | - { | ||
262 | - "cell_type": "code", | ||
263 | - "metadata": { | ||
264 | - "id": "CiJKucX17qb4" | ||
265 | - }, | ||
266 | - "source": [ | ||
267 | - "def get_model(model_class, config, tokenizer, mode):\n", | ||
268 | - " encoder = model_class(config=config)\n", | ||
269 | - " decoder_layer = nn.TransformerDecoderLayer(\n", | ||
270 | - " d_model=config.hidden_size, nhead=config.num_attention_heads\n", | ||
271 | - " )\n", | ||
272 | - " decoder = nn.TransformerDecoder(decoder_layer, num_layers=6)\n", | ||
273 | - " model = Seq2Seq(encoder=encoder, decoder=decoder, config=config,\n", | ||
274 | - " beam_size=args.beam_size, max_length=args.max_target_length,\n", | ||
275 | - " sos_id=tokenizer.cls_token_id, eos_id=tokenizer.sep_token_id)\n", | ||
276 | - "\n", | ||
277 | - " assert args.load_model_path\n", | ||
278 | - " assert os.path.exists(os.path.join(args.load_model_path, mode, 'pytorch_model.bin'))\n", | ||
279 | - "\n", | ||
280 | - " model.load_state_dict(\n", | ||
281 | - " torch.load(\n", | ||
282 | - " os.path.join(args.load_model_path, mode, 'pytorch_model.bin'),\n", | ||
283 | - " map_location=torch.device('cpu')\n", | ||
284 | - " ),\n", | ||
285 | - " strict=False\n", | ||
286 | - " )\n", | ||
287 | - " return model\n", | ||
288 | - "\n", | ||
289 | - "def get_features(examples):\n", | ||
290 | - " features = convert_examples_to_features(examples, args.tokenizer, args, stage='test')\n", | ||
291 | - " all_source_ids = torch.tensor(\n", | ||
292 | - " [f.source_ids[:args.max_source_length] for f in features], dtype=torch.long\n", | ||
293 | - " )\n", | ||
294 | - " all_source_mask = torch.tensor(\n", | ||
295 | - " [f.source_mask[:args.max_source_length] for f in features], dtype=torch.long\n", | ||
296 | - " )\n", | ||
297 | - " all_patch_ids = torch.tensor(\n", | ||
298 | - " [f.patch_ids[:args.max_source_length] for f in features], dtype=torch.long\n", | ||
299 | - " )\n", | ||
300 | - " return TensorDataset(all_source_ids, all_source_mask, all_patch_ids)\n", | ||
301 | - "\n", | ||
302 | - "def create_app():\n", | ||
303 | - " @app.route('/')\n", | ||
304 | - " def index():\n", | ||
305 | - " return jsonify(hello=\"world\")\n", | ||
306 | - "\n", | ||
307 | - " @app.route('/added', methods=['POST'])\n", | ||
308 | - " def added():\n", | ||
309 | - " if request.method == 'POST':\n", | ||
310 | - " payload = request.get_json()\n", | ||
311 | - " example = [\n", | ||
312 | - " Example(\n", | ||
313 | - " idx=payload['idx'],\n", | ||
314 | - " added=payload['added'],\n", | ||
315 | - " deleted=payload['deleted'],\n", | ||
316 | - " target=None\n", | ||
317 | - " )\n", | ||
318 | - " ]\n", | ||
319 | - " message = inference(model=args.added_model, data=get_features(example))\n", | ||
320 | - " return jsonify(idx=payload['idx'], message=message)\n", | ||
321 | - "\n", | ||
322 | - " @app.route('/diff', methods=['POST'])\n", | ||
323 | - " def diff():\n", | ||
324 | - " if request.method == 'POST':\n", | ||
325 | - " payload = request.get_json()\n", | ||
326 | - " example = [\n", | ||
327 | - " Example(\n", | ||
328 | - " idx=payload['idx'],\n", | ||
329 | - " added=payload['added'],\n", | ||
330 | - " deleted=payload['deleted'],\n", | ||
331 | - " target=None\n", | ||
332 | - " )\n", | ||
333 | - " ]\n", | ||
334 | - " message = inference(model=args.diff_model, data=get_features(example))\n", | ||
335 | - " return jsonify(idx=payload['idx'], message=message)\n", | ||
336 | - "\n", | ||
337 | - " @app.route('/tokenizer', methods=['POST'])\n", | ||
338 | - " def tokenizer():\n", | ||
339 | - " if request.method == 'POST':\n", | ||
340 | - " payload = request.get_json()\n", | ||
341 | - " tokens = args.tokenizer.tokenize(payload['code'])\n", | ||
342 | - " return jsonify(tokens=tokens)\n", | ||
343 | - "\n", | ||
344 | - " return app\n", | ||
345 | - "\n", | ||
346 | - "def inference(model, data):\n", | ||
347 | - " # Calculate bleu\n", | ||
348 | - " eval_sampler = SequentialSampler(data)\n", | ||
349 | - " eval_dataloader = DataLoader(data, sampler=eval_sampler, batch_size=len(data))\n", | ||
350 | - "\n", | ||
351 | - " model.eval()\n", | ||
352 | - " p=[]\n", | ||
353 | - " for batch in tqdm(eval_dataloader, total=len(eval_dataloader)):\n", | ||
354 | - " batch = tuple(t.to(args.device) for t in batch)\n", | ||
355 | - " source_ids, source_mask, patch_ids = batch\n", | ||
356 | - " with torch.no_grad():\n", | ||
357 | - " preds = model(source_ids=source_ids, source_mask=source_mask, patch_ids=patch_ids)\n", | ||
358 | - " for pred in preds:\n", | ||
359 | - " t = pred[0].cpu().numpy()\n", | ||
360 | - " t = list(t)\n", | ||
361 | - " if 0 in t:\n", | ||
362 | - " t = t[:t.index(0)]\n", | ||
363 | - " text = args.tokenizer.decode(t, clean_up_tokenization_spaces=False)\n", | ||
364 | - " p.append(text)\n", | ||
365 | - " return p" | ||
366 | - ], | ||
367 | - "execution_count": null, | ||
368 | - "outputs": [] | ||
369 | - }, | ||
370 | - { | ||
371 | - "cell_type": "markdown", | ||
372 | - "metadata": { | ||
373 | - "id": "Esf4r-Ai8cG3" | ||
374 | - }, | ||
375 | - "source": [ | ||
376 | - "**Set enviroment**" | ||
377 | - ] | ||
378 | - }, | ||
379 | - { | ||
380 | - "cell_type": "code", | ||
381 | - "metadata": { | ||
382 | - "id": "mR7gVmSoSUoy" | ||
383 | - }, | ||
384 | - "source": [ | ||
385 | - "import easydict \n", | ||
386 | - "\n", | ||
387 | - "args = easydict.EasyDict({\n", | ||
388 | - " 'load_model_path': 'weight/', \n", | ||
389 | - " 'model_type': 'roberta',\n", | ||
390 | - " 'config_name' : 'microsoft/codebert-base',\n", | ||
391 | - " 'tokenizer_name' : 'microsoft/codebert-base',\n", | ||
392 | - " 'max_source_length' : 512,\n", | ||
393 | - " 'max_target_length' : 128,\n", | ||
394 | - " 'beam_size' : 10,\n", | ||
395 | - " 'do_lower_case' : False,\n", | ||
396 | - " 'device' : torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", | ||
397 | - "})" | ||
398 | - ], | ||
399 | - "execution_count": null, | ||
400 | - "outputs": [] | ||
401 | - }, | ||
402 | - { | ||
403 | - "cell_type": "code", | ||
404 | - "metadata": { | ||
405 | - "id": "e8dk5RwvToOv" | ||
406 | - }, | ||
407 | - "source": [ | ||
408 | - "# flask_ngrok_example.py\n", | ||
409 | - "from flask_ngrok import run_with_ngrok\n", | ||
410 | - "\n", | ||
411 | - "app = Flask(__name__)\n", | ||
412 | - "run_with_ngrok(app) # Start ngrok when app is run\n", | ||
413 | - "\n", | ||
414 | - "config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]\n", | ||
415 | - "config = config_class.from_pretrained(args.config_name)\n", | ||
416 | - "args.tokenizer = tokenizer_class.from_pretrained(args.tokenizer_name, do_lower_case=args.do_lower_case)\n", | ||
417 | - "\n", | ||
418 | - "# budild model\n", | ||
419 | - "args.added_model =get_model(model_class=model_class, config=config,\n", | ||
420 | - " tokenizer=args.tokenizer, mode='added').to(args.device)\n", | ||
421 | - "args.diff_model = get_model(model_class=model_class, config=config,\n", | ||
422 | - " tokenizer=args.tokenizer, mode='diff').to(args.device)\n", | ||
423 | - "\n", | ||
424 | - "app = create_app()\n", | ||
425 | - "app.run()" | ||
426 | - ], | ||
427 | - "execution_count": null, | ||
428 | - "outputs": [] | ||
429 | - }, | ||
430 | { | 227 | { |
431 | - "cell_type": "markdown", | 228 | + "output_type": "stream", |
432 | - "metadata": { | 229 | + "text": [ |
433 | - "id": "DXkBcO_sU_VN" | 230 | + "Opening tunnel\n", |
434 | - }, | 231 | + "Something went wrong, reopen the tunnel\n" |
435 | - "source": [ | 232 | + ], |
436 | - "## Set commit configure\n", | 233 | + "name": "stdout" |
437 | - "Now, set commit configure on your local computer.\n", | ||
438 | - "```shell\n", | ||
439 | - "$ commit configure --endpoint http://********.ngrok.io\n", | ||
440 | - "```" | ||
441 | - ] | ||
442 | } | 234 | } |
443 | - ] | 235 | + ] |
236 | + }, | ||
237 | + { | ||
238 | + "cell_type": "markdown", | ||
239 | + "metadata": { | ||
240 | + "id": "cEZ-O0wz74OJ" | ||
241 | + }, | ||
242 | + "source": [ | ||
243 | + "#### Run you server!" | ||
244 | + ] | ||
245 | + }, | ||
246 | + { | ||
247 | + "cell_type": "code", | ||
248 | + "metadata": { | ||
249 | + "id": "7PRkeYTL8Y_6" | ||
250 | + }, | ||
251 | + "source": [ | ||
252 | + "import os\n", | ||
253 | + "import torch\n", | ||
254 | + "import argparse\n", | ||
255 | + "from tqdm import tqdm\n", | ||
256 | + "import torch.nn as nn\n", | ||
257 | + "from torch.utils.data import TensorDataset, DataLoader, SequentialSampler\n", | ||
258 | + "from transformers import (RobertaConfig, RobertaTokenizer)\n", | ||
259 | + "\n", | ||
260 | + "from commit.model import Seq2Seq\n", | ||
261 | + "from commit.utils import (Example, convert_examples_to_features)\n", | ||
262 | + "from commit.model.diff_roberta import RobertaModel\n", | ||
263 | + "\n", | ||
264 | + "from flask import Flask, jsonify, request\n", | ||
265 | + "\n", | ||
266 | + "MODEL_CLASSES = {'roberta': (RobertaConfig, RobertaModel, RobertaTokenizer)}" | ||
267 | + ], | ||
268 | + "execution_count": null, | ||
269 | + "outputs": [] | ||
270 | + }, | ||
271 | + { | ||
272 | + "cell_type": "code", | ||
273 | + "metadata": { | ||
274 | + "id": "CiJKucX17qb4" | ||
275 | + }, | ||
276 | + "source": [ | ||
277 | + "def get_model(model_class, config, tokenizer, mode):\n", | ||
278 | + " encoder = model_class(config=config)\n", | ||
279 | + " decoder_layer = nn.TransformerDecoderLayer(\n", | ||
280 | + " d_model=config.hidden_size, nhead=config.num_attention_heads\n", | ||
281 | + " )\n", | ||
282 | + " decoder = nn.TransformerDecoder(decoder_layer, num_layers=6)\n", | ||
283 | + " model = Seq2Seq(encoder=encoder, decoder=decoder, config=config,\n", | ||
284 | + " beam_size=args.beam_size, max_length=args.max_target_length,\n", | ||
285 | + " sos_id=tokenizer.cls_token_id, eos_id=tokenizer.sep_token_id)\n", | ||
286 | + "\n", | ||
287 | + " assert args.load_model_path\n", | ||
288 | + " assert os.path.exists(os.path.join(args.load_model_path, mode, 'pytorch_model.bin'))\n", | ||
289 | + "\n", | ||
290 | + " model.load_state_dict(\n", | ||
291 | + " torch.load(\n", | ||
292 | + " os.path.join(args.load_model_path, mode, 'pytorch_model.bin'),\n", | ||
293 | + " map_location=torch.device('cpu')\n", | ||
294 | + " ),\n", | ||
295 | + " strict=False\n", | ||
296 | + " )\n", | ||
297 | + " return model\n", | ||
298 | + "\n", | ||
299 | + "def get_features(examples):\n", | ||
300 | + " features = convert_examples_to_features(examples, args.tokenizer, args, stage='test')\n", | ||
301 | + " all_source_ids = torch.tensor(\n", | ||
302 | + " [f.source_ids[:args.max_source_length] for f in features], dtype=torch.long\n", | ||
303 | + " )\n", | ||
304 | + " all_source_mask = torch.tensor(\n", | ||
305 | + " [f.source_mask[:args.max_source_length] for f in features], dtype=torch.long\n", | ||
306 | + " )\n", | ||
307 | + " all_patch_ids = torch.tensor(\n", | ||
308 | + " [f.patch_ids[:args.max_source_length] for f in features], dtype=torch.long\n", | ||
309 | + " )\n", | ||
310 | + " return TensorDataset(all_source_ids, all_source_mask, all_patch_ids)\n", | ||
311 | + "\n", | ||
312 | + "def create_app():\n", | ||
313 | + " @app.route('/')\n", | ||
314 | + " def index():\n", | ||
315 | + " return jsonify(hello=\"world\")\n", | ||
316 | + "\n", | ||
317 | + " @app.route('/added', methods=['POST'])\n", | ||
318 | + " def added():\n", | ||
319 | + " if request.method == 'POST':\n", | ||
320 | + " payload = request.get_json()\n", | ||
321 | + " example = [\n", | ||
322 | + " Example(\n", | ||
323 | + " idx=payload['idx'],\n", | ||
324 | + " added=payload['added'],\n", | ||
325 | + " deleted=payload['deleted'],\n", | ||
326 | + " target=None\n", | ||
327 | + " )\n", | ||
328 | + " ]\n", | ||
329 | + " message = inference(model=args.added_model, data=get_features(example))\n", | ||
330 | + " return jsonify(idx=payload['idx'], message=message)\n", | ||
331 | + "\n", | ||
332 | + " @app.route('/diff', methods=['POST'])\n", | ||
333 | + " def diff():\n", | ||
334 | + " if request.method == 'POST':\n", | ||
335 | + " payload = request.get_json()\n", | ||
336 | + " example = [\n", | ||
337 | + " Example(\n", | ||
338 | + " idx=payload['idx'],\n", | ||
339 | + " added=payload['added'],\n", | ||
340 | + " deleted=payload['deleted'],\n", | ||
341 | + " target=None\n", | ||
342 | + " )\n", | ||
343 | + " ]\n", | ||
344 | + " message = inference(model=args.diff_model, data=get_features(example))\n", | ||
345 | + " return jsonify(idx=payload['idx'], message=message)\n", | ||
346 | + "\n", | ||
347 | + " @app.route('/tokenizer', methods=['POST'])\n", | ||
348 | + " def tokenizer():\n", | ||
349 | + " if request.method == 'POST':\n", | ||
350 | + " payload = request.get_json()\n", | ||
351 | + " tokens = args.tokenizer.tokenize(payload['code'])\n", | ||
352 | + " return jsonify(tokens=tokens)\n", | ||
353 | + "\n", | ||
354 | + " return app\n", | ||
355 | + "\n", | ||
356 | + "def inference(model, data):\n", | ||
357 | + " # Calculate bleu\n", | ||
358 | + " eval_sampler = SequentialSampler(data)\n", | ||
359 | + " eval_dataloader = DataLoader(data, sampler=eval_sampler, batch_size=len(data))\n", | ||
360 | + "\n", | ||
361 | + " model.eval()\n", | ||
362 | + " p=[]\n", | ||
363 | + " for batch in tqdm(eval_dataloader, total=len(eval_dataloader)):\n", | ||
364 | + " batch = tuple(t.to(args.device) for t in batch)\n", | ||
365 | + " source_ids, source_mask, patch_ids = batch\n", | ||
366 | + " with torch.no_grad():\n", | ||
367 | + " preds = model(source_ids=source_ids, source_mask=source_mask, patch_ids=patch_ids)\n", | ||
368 | + " for pred in preds:\n", | ||
369 | + " t = pred[0].cpu().numpy()\n", | ||
370 | + " t = list(t)\n", | ||
371 | + " if 0 in t:\n", | ||
372 | + " t = t[:t.index(0)]\n", | ||
373 | + " text = args.tokenizer.decode(t, clean_up_tokenization_spaces=False)\n", | ||
374 | + " p.append(text)\n", | ||
375 | + " return p" | ||
376 | + ], | ||
377 | + "execution_count": null, | ||
378 | + "outputs": [] | ||
379 | + }, | ||
380 | + { | ||
381 | + "cell_type": "markdown", | ||
382 | + "metadata": { | ||
383 | + "id": "Esf4r-Ai8cG3" | ||
384 | + }, | ||
385 | + "source": [ | ||
386 | + "**Set enviroment**" | ||
387 | + ] | ||
388 | + }, | ||
389 | + { | ||
390 | + "cell_type": "code", | ||
391 | + "metadata": { | ||
392 | + "id": "mR7gVmSoSUoy" | ||
393 | + }, | ||
394 | + "source": [ | ||
395 | + "import easydict \n", | ||
396 | + "\n", | ||
397 | + "args = easydict.EasyDict({\n", | ||
398 | + " 'load_model_path': 'weight/', \n", | ||
399 | + " 'model_type': 'roberta',\n", | ||
400 | + " 'config_name' : 'microsoft/codebert-base',\n", | ||
401 | + " 'tokenizer_name' : 'microsoft/codebert-base',\n", | ||
402 | + " 'max_source_length' : 512,\n", | ||
403 | + " 'max_target_length' : 128,\n", | ||
404 | + " 'beam_size' : 10,\n", | ||
405 | + " 'do_lower_case' : False,\n", | ||
406 | + " 'device' : torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", | ||
407 | + "})" | ||
408 | + ], | ||
409 | + "execution_count": null, | ||
410 | + "outputs": [] | ||
411 | + }, | ||
412 | + { | ||
413 | + "cell_type": "code", | ||
414 | + "metadata": { | ||
415 | + "id": "e8dk5RwvToOv" | ||
416 | + }, | ||
417 | + "source": [ | ||
418 | + "# flask_ngrok_example.py\n", | ||
419 | + "from flask_ngrok import run_with_ngrok\n", | ||
420 | + "\n", | ||
421 | + "app = Flask(__name__)\n", | ||
422 | + "run_with_ngrok(app) # Start ngrok when app is run\n", | ||
423 | + "\n", | ||
424 | + "config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]\n", | ||
425 | + "config = config_class.from_pretrained(args.config_name)\n", | ||
426 | + "args.tokenizer = tokenizer_class.from_pretrained(args.tokenizer_name, do_lower_case=args.do_lower_case)\n", | ||
427 | + "\n", | ||
428 | + "# budild model\n", | ||
429 | + "args.added_model =get_model(model_class=model_class, config=config,\n", | ||
430 | + " tokenizer=args.tokenizer, mode='added').to(args.device)\n", | ||
431 | + "args.diff_model = get_model(model_class=model_class, config=config,\n", | ||
432 | + " tokenizer=args.tokenizer, mode='diff').to(args.device)\n", | ||
433 | + "\n", | ||
434 | + "app = create_app()\n", | ||
435 | + "app.run()" | ||
436 | + ], | ||
437 | + "execution_count": null, | ||
438 | + "outputs": [] | ||
439 | + }, | ||
440 | + { | ||
441 | + "cell_type": "markdown", | ||
442 | + "metadata": { | ||
443 | + "id": "DXkBcO_sU_VN" | ||
444 | + }, | ||
445 | + "source": [ | ||
446 | + "## Set commit configure\n", | ||
447 | + "Now, set commit configure on your local computer.\n", | ||
448 | + "```shell\n", | ||
449 | + "$ commit configure --endpoint http://********.ngrok.io\n", | ||
450 | + "```" | ||
451 | + ] | ||
452 | + } | ||
453 | + ] | ||
444 | } | 454 | } |
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
docker/javascript/Dockerfile
0 → 100644
1 | +FROM nvcr.io/nvidia/cuda:10.0-cudnn7-runtime-ubuntu18.04 | ||
2 | +LABEL maintainer="nlkey2022@gmail.com" | ||
3 | + | ||
4 | +RUN DEBIAN_FRONTEND=noninteractive apt-get -qq update \ | ||
5 | + && DEBIAN_FRONTEND=noninteractive apt-get -qqy install curl python3-pip git \ | ||
6 | + && rm -rf /var/lib/apt/lists/* | ||
7 | + | ||
8 | +ARG PYTORCH_WHEEL="https://download.pytorch.org/whl/cu101/torch-1.6.0%2Bcu101-cp36-cp36m-linux_x86_64.whl" | ||
9 | +ARG ADDED_MODEL="1-F68ymKxZ-htCzQ8_Y9iHexs2SJmP5Gc" | ||
10 | +ARG DIFF_MODEL="1-39rmu-3clwebNURMQGMt-oM4HsAkbsf" | ||
11 | + | ||
12 | +RUN git clone https://github.com/graykode/commit-autosuggestions.git /app/commit-autosuggestions \ | ||
13 | + && cd /app/commit-autosuggestions | ||
14 | + | ||
15 | +WORKDIR /app/commit-autosuggestions | ||
16 | + | ||
17 | +RUN pip3 install ${PYTORCH_WHEEL} gdown | ||
18 | +RUN gdown https://drive.google.com/uc?id=${ADDED_MODEL} -O weight/javascript/added/ | ||
19 | +RUN gdown https://drive.google.com/uc?id=${DIFF_MODEL} -O weight/javascript/diff/ | ||
20 | + | ||
21 | +RUN pip3 install -r requirements.txt | ||
22 | + | ||
23 | +ENTRYPOINT ["python3", "app.py", "--load_model_path", "./weight/javascript/"] |
... | @@ -10,14 +10,14 @@ ARG ADDED_MODEL="1YrkwfM-0VBCJaa9NYaXUQPODdGPsmQY4" | ... | @@ -10,14 +10,14 @@ ARG ADDED_MODEL="1YrkwfM-0VBCJaa9NYaXUQPODdGPsmQY4" |
10 | ARG DIFF_MODEL="1--gcVVix92_Fp75A-mWH0pJS0ahlni5m" | 10 | ARG DIFF_MODEL="1--gcVVix92_Fp75A-mWH0pJS0ahlni5m" |
11 | 11 | ||
12 | RUN git clone https://github.com/graykode/commit-autosuggestions.git /app/commit-autosuggestions \ | 12 | RUN git clone https://github.com/graykode/commit-autosuggestions.git /app/commit-autosuggestions \ |
13 | - && cd /app/commit-autosuggestions && python3 setup.py install | 13 | + && cd /app/commit-autosuggestions |
14 | 14 | ||
15 | WORKDIR /app/commit-autosuggestions | 15 | WORKDIR /app/commit-autosuggestions |
16 | 16 | ||
17 | RUN pip3 install ${PYTORCH_WHEEL} gdown | 17 | RUN pip3 install ${PYTORCH_WHEEL} gdown |
18 | -RUN gdown https://drive.google.com/uc?id=${ADDED_MODEL} -O weight/added/ | 18 | +RUN gdown https://drive.google.com/uc?id=${ADDED_MODEL} -O weight/python/added/ |
19 | -RUN gdown https://drive.google.com/uc?id=${DIFF_MODEL} -O weight/diff/ | 19 | +RUN gdown https://drive.google.com/uc?id=${DIFF_MODEL} -O weight/python/diff/ |
20 | 20 | ||
21 | RUN pip3 install -r requirements.txt | 21 | RUN pip3 install -r requirements.txt |
22 | 22 | ||
23 | -ENTRYPOINT ["python3", "app.py"] | 23 | +ENTRYPOINT ["python3", "app.py", "--load_model_path", "./weight/python/"] | ... | ... |
... | @@ -104,6 +104,8 @@ optional arguments: | ... | @@ -104,6 +104,8 @@ optional arguments: |
104 | The maximum total target sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded. | 104 | The maximum total target sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded. |
105 | ``` | 105 | ``` |
106 | 106 | ||
107 | +> If `UnicodeDecodeError` occurs while using gitparser.py, you must use the [GitPython](https://github.com/gitpython-developers/GitPython) package at least [this commit](https://github.com/gitpython-developers/GitPython/commit/bfbd5ece215dea328c3c6c4cba31225caa66ae9a). | ||
108 | + | ||
107 | #### 3. Training Added model(Optional for Python Language). | 109 | #### 3. Training Added model(Optional for Python Language). |
108 | Python has learned the Added model. So, if you only want to make a Diff model for the Python language, step 3 can be ignored. However, for other languages (JavaScript, GO, Ruby, PHP and JAVA), [Code2NL training](https://github.com/microsoft/CodeBERT#fine-tune-1) is required to use as the initial weight of the model to be used in step 4. | 110 | Python has learned the Added model. So, if you only want to make a Diff model for the Python language, step 3 can be ignored. However, for other languages (JavaScript, GO, Ruby, PHP and JAVA), [Code2NL training](https://github.com/microsoft/CodeBERT#fine-tune-1) is required to use as the initial weight of the model to be used in step 4. |
109 | 111 | ... | ... |
... | @@ -24,6 +24,15 @@ from multiprocessing.pool import Pool | ... | @@ -24,6 +24,15 @@ from multiprocessing.pool import Pool |
24 | from transformers import RobertaTokenizer | 24 | from transformers import RobertaTokenizer |
25 | from pydriller import RepositoryMining | 25 | from pydriller import RepositoryMining |
26 | 26 | ||
27 | +language = { | ||
28 | + 'py' : ['.py'], | ||
29 | + 'js' : ['.js', '.ts'], | ||
30 | + 'go' : ['.go'], | ||
31 | + 'java' : ['.java'], | ||
32 | + 'ruby' : ['.rb'], | ||
33 | + 'php' : ['.php'] | ||
34 | +} | ||
35 | + | ||
27 | def message_cleaner(message): | 36 | def message_cleaner(message): |
28 | msg = message.split("\n")[0] | 37 | msg = message.split("\n")[0] |
29 | msg = re.sub(r"(\(|)#([0-9])+(\)|)", "", msg) | 38 | msg = re.sub(r"(\(|)#([0-9])+(\)|)", "", msg) |
... | @@ -34,7 +43,7 @@ def jobs(repo, args): | ... | @@ -34,7 +43,7 @@ def jobs(repo, args): |
34 | repo_path = os.path.join(args.repos_dir, repo) | 43 | repo_path = os.path.join(args.repos_dir, repo) |
35 | if os.path.exists(repo_path): | 44 | if os.path.exists(repo_path): |
36 | for commit in RepositoryMining( | 45 | for commit in RepositoryMining( |
37 | - repo_path, only_modifications_with_file_types=['.py'] | 46 | + repo_path, only_modifications_with_file_types=language[args.lang] |
38 | ).traverse_commits(): | 47 | ).traverse_commits(): |
39 | cleaned_message = message_cleaner(commit.msg) | 48 | cleaned_message = message_cleaner(commit.msg) |
40 | tokenized_message = args.tokenizer.tokenize(cleaned_message) | 49 | tokenized_message = args.tokenizer.tokenize(cleaned_message) |
... | @@ -44,7 +53,7 @@ def jobs(repo, args): | ... | @@ -44,7 +53,7 @@ def jobs(repo, args): |
44 | for mod in commit.modifications: | 53 | for mod in commit.modifications: |
45 | if not (mod.old_path and mod.new_path): | 54 | if not (mod.old_path and mod.new_path): |
46 | continue | 55 | continue |
47 | - if os.path.splitext(mod.new_path)[1] != '.py': | 56 | + if os.path.splitext(mod.new_path)[1] not in language[args.lang]: |
48 | continue | 57 | continue |
49 | if not mod.diff_parsed["added"]: | 58 | if not mod.diff_parsed["added"]: |
50 | continue | 59 | continue |
... | @@ -121,6 +130,9 @@ if __name__ == "__main__": | ... | @@ -121,6 +130,9 @@ if __name__ == "__main__": |
121 | help="directory that all repositories had been downloaded.",) | 130 | help="directory that all repositories had been downloaded.",) |
122 | parser.add_argument("--output_dir", type=str, required=True, | 131 | parser.add_argument("--output_dir", type=str, required=True, |
123 | help="The output directory where the preprocessed data will be written.") | 132 | help="The output directory where the preprocessed data will be written.") |
133 | + parser.add_argument("--lang", type=str, required=True, | ||
134 | + choices=['py', 'js', 'go', 'java', 'ruby', 'php'], | ||
135 | + help="The output directory where the preprocessed data will be written.") | ||
124 | parser.add_argument("--tokenizer_name", type=str, | 136 | parser.add_argument("--tokenizer_name", type=str, |
125 | default="microsoft/codebert-base", help="The name of tokenizer",) | 137 | default="microsoft/codebert-base", help="The name of tokenizer",) |
126 | parser.add_argument("--num_workers", default=4, type=int, help="number of process") | 138 | parser.add_argument("--num_workers", default=4, type=int, help="number of process") | ... | ... |
repositories/javascript.txt
0 → 100644
1 | +https://github.com/freeCodeCamp/freeCodeCamp | ||
2 | +https://github.com/vuejs/vue | ||
3 | +https://github.com/facebook/react | ||
4 | +https://github.com/twbs/bootstrap | ||
5 | +https://github.com/airbnb/javascript | ||
6 | +https://github.com/d3/d3 | ||
7 | +https://github.com/facebook/react-native | ||
8 | +https://github.com/trekhleb/javascript-algorithms | ||
9 | +https://github.com/facebook/create-react-app | ||
10 | +https://github.com/axios/axios | ||
11 | +https://github.com/nodejs/node | ||
12 | +https://github.com/mrdoob/three.js | ||
13 | +https://github.com/mui-org/material-ui | ||
14 | +https://github.com/angular/angular.js | ||
15 | +https://github.com/vercel/next.js | ||
16 | +https://github.com/webpack/webpack | ||
17 | +https://github.com/jquery/jquery | ||
18 | +https://github.com/hakimel/reveal.js | ||
19 | +https://github.com/atom/atom | ||
20 | +https://github.com/socketio/socket.io | ||
21 | +https://github.com/chartjs/Chart.js | ||
22 | +https://github.com/expressjs/express | ||
23 | +https://github.com/typicode/json-server | ||
24 | +https://github.com/adam-p/markdown-here | ||
25 | +https://github.com/Semantic-Org/Semantic-UI | ||
26 | +https://github.com/h5bp/html5-boilerplate | ||
27 | +https://github.com/gatsbyjs/gatsby | ||
28 | +https://github.com/lodash/lodash | ||
29 | +https://github.com/yangshun/tech-interview-handbook | ||
30 | +https://github.com/moment/moment | ||
31 | +https://github.com/apache/incubator-echarts | ||
32 | +https://github.com/meteor/meteor | ||
33 | +https://github.com/ReactTraining/react-router | ||
34 | +https://github.com/yarnpkg/yarn | ||
35 | +https://github.com/sveltejs/svelte | ||
36 | +https://github.com/Dogfalo/materialize | ||
37 | +https://github.com/prettier/prettier | ||
38 | +https://github.com/serverless/serverless | ||
39 | +https://github.com/babel/babel | ||
40 | +https://github.com/nwjs/nw.js | ||
41 | +https://github.com/juliangarnier/anime | ||
42 | +https://github.com/parcel-bundler/parcel | ||
43 | +https://github.com/ColorlibHQ/AdminLTE | ||
44 | +https://github.com/impress/impress.js | ||
45 | +https://github.com/TryGhost/Ghost | ||
46 | +https://github.com/Unitech/pm2 | ||
47 | +https://github.com/mozilla/pdf.js | ||
48 | +https://github.com/mermaid-js/mermaid | ||
49 | +https://github.com/algorithm-visualizer/algorithm-visualizer | ||
50 | +https://github.com/adobe/brackets | ||
51 | +https://github.com/gulpjs/gulp | ||
52 | +https://github.com/hexojs/hexo | ||
53 | +https://github.com/styled-components/styled-components | ||
54 | +https://github.com/nuxt/nuxt.js | ||
55 | +https://github.com/sahat/hackathon-starter | ||
56 | +https://github.com/alvarotrigo/fullPage.js | ||
57 | +https://github.com/strapi/strapi | ||
58 | +https://github.com/immutable-js/immutable-js | ||
59 | +https://github.com/koajs/koa | ||
60 | +https://github.com/videojs/video.js | ||
61 | +https://github.com/zenorocha/clipboard.js | ||
62 | +https://github.com/Leaflet/Leaflet | ||
63 | +https://github.com/RocketChat/Rocket.Chat | ||
64 | +https://github.com/photonstorm/phaser | ||
65 | +https://github.com/quilljs/quill | ||
66 | +https://github.com/jashkenas/backbone | ||
67 | +https://github.com/preactjs/preact | ||
68 | +https://github.com/tastejs/todomvc | ||
69 | +https://github.com/caolan/async | ||
70 | +https://github.com/vuejs/vue-cli | ||
71 | +https://github.com/react-boilerplate/react-boilerplate | ||
72 | +https://github.com/aosabook/500lines | ||
73 | +https://github.com/carbon-app/carbon | ||
74 | +https://github.com/Marak/faker.js | ||
75 | +https://github.com/jashkenas/underscore | ||
76 | +https://github.com/lerna/lerna | ||
77 | +https://github.com/nolimits4web/swiper | ||
78 | +https://github.com/vuejs/vuex | ||
79 | +https://github.com/request/request | ||
80 | +https://github.com/select2/select2 | ||
81 | +https://github.com/Modernizr/Modernizr | ||
82 | +https://github.com/facebook/draft-js | ||
83 | +https://github.com/rollup/rollup | ||
84 | +https://github.com/jlmakes/scrollreveal | ||
85 | +https://github.com/tj/commander.js | ||
86 | +https://github.com/chenglou/react-motion | ||
87 | +https://github.com/swagger-api/swagger-ui | ||
88 | +https://github.com/bilibili/flv.js | ||
89 | +https://github.com/segmentio/nightmare | ||
90 | +https://github.com/laurent22/joplin | ||
91 | +https://github.com/react-bootstrap/react-bootstrap | ||
92 | +https://github.com/sampotts/plyr | ||
93 | +https://github.com/avajs/ava | ||
94 | +https://github.com/immerjs/immer | ||
95 | +https://github.com/jorgebucaran/hyperapp | ||
96 | +https://github.com/jaredhanson/passport | ||
97 | +https://github.com/lovell/sharp | ||
98 | +https://github.com/localForage/localForage | ||
99 | +https://github.com/Popmotion/popmotion | ||
100 | +https://github.com/vuejs/vuepress | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
File moved
tests/javascript/added.diff
0 → 100644
1 | +diff --git a/function.js b/function.js | ||
2 | +new file mode 100644 | ||
3 | +index 0000000..ba89d9a | ||
4 | +--- /dev/null | ||
5 | ++++ b/function.js | ||
6 | +@@ -0,0 +1,6 @@ | ||
7 | ++function getIntoAnArgument() { | ||
8 | ++ var args = arguments.slice(); | ||
9 | ++ args.forEach(function(arg) { | ||
10 | ++ console.log(arg); | ||
11 | ++ }); | ||
12 | ++} | ||
13 | +\ No newline at end of file |
tests/javascript/fixed.diff
0 → 100644
1 | +diff --git a/function.js b/function.js | ||
2 | +index ba89d9a..d440734 100644 | ||
3 | +--- a/function.js | ||
4 | ++++ b/function.js | ||
5 | +@@ -1,6 +1,3 @@ | ||
6 | +-function getIntoAnArgument() { | ||
7 | +- var args = arguments.slice(); | ||
8 | +- args.forEach(function(arg) { | ||
9 | +- console.log(arg); | ||
10 | +- }); | ||
11 | ++function getIntoAnArgument(...args) { | ||
12 | ++ args.forEach(arg => console.log(arg)); | ||
13 | + } | ||
14 | +\ No newline at end of file |
File moved
File moved
... | @@ -65,10 +65,6 @@ class CitiesTestCase(unittest.TestCase): | ... | @@ -65,10 +65,6 @@ class CitiesTestCase(unittest.TestCase): |
65 | ) | 65 | ) |
66 | ) | 66 | ) |
67 | self.assertEqual(response.status_code, 200) | 67 | self.assertEqual(response.status_code, 200) |
68 | - self.assertEqual( | ||
69 | - json.loads(response.text), | ||
70 | - {'idx': 0, 'message': ['Test method .']} | ||
71 | - ) | ||
72 | 68 | ||
73 | def test_added(self): | 69 | def test_added(self): |
74 | response = requests.post( | 70 | response = requests.post( |
... | @@ -83,10 +79,6 @@ class CitiesTestCase(unittest.TestCase): | ... | @@ -83,10 +79,6 @@ class CitiesTestCase(unittest.TestCase): |
83 | ) | 79 | ) |
84 | ) | 80 | ) |
85 | self.assertEqual(response.status_code, 200) | 81 | self.assertEqual(response.status_code, 200) |
86 | - self.assertEqual( | ||
87 | - json.loads(response.text), | ||
88 | - {'idx': 0, 'message': ['Fix typo']} | ||
89 | - ) | ||
90 | 82 | ||
91 | 83 | ||
92 | def suite(): | 84 | def suite(): | ... | ... |
-
Please register or login to post a comment