kykint

Implement OCR using tesseract

...@@ -17,6 +17,12 @@ const languagedetect_api_url = 'https://openapi.naver.com/v1/papago/detectLangs' ...@@ -17,6 +17,12 @@ const languagedetect_api_url = 'https://openapi.naver.com/v1/papago/detectLangs'
17 const db = require('./db'); 17 const db = require('./db');
18 db.init(); 18 db.init();
19 19
20 +// Tesseract module for image recognition
21 +const tesseract = require('node-tesseract-ocr');
22 +
23 +// fs module for saving/removing image file upon/after recognition
24 +const fs = require('fs');
25 +
20 // /echo [whatever] 26 // /echo [whatever]
21 bot.onText(/\/echo (.+)/, (msg, match) => { 27 bot.onText(/\/echo (.+)/, (msg, match) => {
22 // 'msg' is the received Message from Telegram 28 // 'msg' is the received Message from Telegram
...@@ -184,6 +190,55 @@ bot.onText(/^\/(t|translate)($| ((.|\n)+))/, (msg, match) => { ...@@ -184,6 +190,55 @@ bot.onText(/^\/(t|translate)($| ((.|\n)+))/, (msg, match) => {
184 } 190 }
185 }); 191 });
186 192
193 +// When an image file is received, temporarily save it in current directory
194 +// and use tesseract to recognize its text. The texts are then translated
195 +// and sent to the user.
196 +bot.on('photo', (msg) => {
197 + const user = msg.from;
198 + const chatId = msg.chat.id;
199 + const photo = msg.photo[2];
200 + const photoId = photo.file_id;
201 +
202 + // Download the image, which will later be deleted to avoid git detection
203 + bot.downloadFile(photoId, '.').then(function (filePath) {
204 + console.log('Image downloaded: ', filePath);
205 +
206 + // Begin tesseract OCR
207 + tesseract.recognize(filePath).then(function (text) {
208 + console.log('OCR result: ', text);
209 +
210 + // Translate the result
211 + translate(user, text).then(function (result) {
212 + // Send recognized text to user
213 + bot.sendMessage(chatId, result);
214 + // Delete the image
215 + fs.unlink(filePath, function (error) {
216 + if (error) {
217 + console.log('Error deleting image: ', error);
218 + } else {
219 + console.log('Successfully deleted file ', filePath);
220 + }
221 + });
222 + });
223 + }).catch(function (error) {
224 + // OCR failed
225 + console.log('OCR error: ', error);
226 +
227 + // Delete the image
228 + fs.unlink(filePath, function (error) {
229 + if (error) {
230 + console.log('Error deleting image: ', error);
231 + } else {
232 + console.log('Successfully deleted file ', filePath);
233 + }
234 + });
235 + });
236 + }).catch(function (error) {
237 + // Image download failed
238 + console.log('Image download error: ', error);
239 + });
240 +});
241 +
187 // /l(anguage) 242 // /l(anguage)
188 // Let user select the language he wants his message to translate to. 243 // Let user select the language he wants his message to translate to.
189 // When triggered, bot will send an inline keyboard message with a list 244 // When triggered, bot will send an inline keyboard message with a list
......
...@@ -417,6 +417,11 @@ ...@@ -417,6 +417,11 @@
417 } 417 }
418 } 418 }
419 }, 419 },
420 + "node-tesseract-ocr": {
421 + "version": "0.1.0",
422 + "resolved": "https://registry.npmjs.org/node-tesseract-ocr/-/node-tesseract-ocr-0.1.0.tgz",
423 + "integrity": "sha512-gUp+fy8Wiy2ZZQiSe33ApfyaBPOOlTz8nrHjZqWGAX4l06lW01uO6ABA5KWBhO3B1DmWl1Y9T1aLasfXYpm2Dg=="
424 + },
420 "oauth-sign": { 425 "oauth-sign": {
421 "version": "0.9.0", 426 "version": "0.9.0",
422 "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz", 427 "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz",
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
12 "dependencies": { 12 "dependencies": {
13 "mongodb": "^3.2.6", 13 "mongodb": "^3.2.6",
14 "node-telegram-bot-api": "^0.30.0", 14 "node-telegram-bot-api": "^0.30.0",
15 + "node-tesseract-ocr": "^0.1.0",
15 "request": "^2.88.0" 16 "request": "^2.88.0"
16 } 17 }
17 } 18 }
......