kykint

Switch to google cloud vision for OCR

Also separate text recognition code into a function
...@@ -17,8 +17,9 @@ const languagedetect_api_url = 'https://openapi.naver.com/v1/papago/detectLangs' ...@@ -17,8 +17,9 @@ const languagedetect_api_url = 'https://openapi.naver.com/v1/papago/detectLangs'
17 const db = require('./db'); 17 const db = require('./db');
18 db.init(); 18 db.init();
19 19
20 -// Tesseract module for image recognition 20 +// Import Google Cloud client library and create a client
21 -const tesseract = require('node-tesseract-ocr'); 21 +const vision = require('@google-cloud/vision');
22 +const visionClient = new vision.ImageAnnotatorClient(config.gcloud);
22 23
23 // fs module for saving/removing image file upon/after recognition 24 // fs module for saving/removing image file upon/after recognition
24 const fs = require('fs'); 25 const fs = require('fs');
...@@ -136,6 +137,50 @@ function translate(user, message) { ...@@ -136,6 +137,50 @@ function translate(user, message) {
136 }); 137 });
137 } 138 }
138 139
140 +/**
141 + * Detect and read text from an image file using Google Cloud Vision API.
142 + *
143 + * @param {*} fileId Telegram-provided file id of image to read text from
144 + */
145 +function detectText(fileId) {
146 + return new Promise(function (resolve, reject) {
147 + // Download the image, which will later be deleted to avoid git detection
148 + bot.downloadFile(fileId, '.').then(function (filePath) {
149 + console.log('Image downloaded: ', filePath);
150 +
151 + visionClient.documentTextDetection(filePath)
152 + .then(function (result) {
153 + const fullTextAnnotation = result[0].fullTextAnnotation;
154 + const text = fullTextAnnotation.text;
155 +
156 + console.log('Text detection result: ', text);
157 +
158 + resolve(text);
159 + // Delete the image
160 + fs.unlink(filePath, function (error) {
161 + if (error) {
162 + console.log('Error deleting image: ', error);
163 + } else {
164 + console.log('Successfully deleted file ', filePath);
165 + }
166 + });
167 + }).catch(function (error) {
168 + console.log(error);
169 +
170 + reject(error);
171 + // Delete the image
172 + fs.unlink(filePath, function (error) {
173 + if (error) {
174 + console.log('Error deleting image: ', error);
175 + } else {
176 + console.log('Successfully deleted file ', filePath);
177 + }
178 + });
179 + });
180 + });
181 + });
182 +}
183 +
139 // [Any normal message which is not a command (not starting with '/')] 184 // [Any normal message which is not a command (not starting with '/')]
140 bot.onText(/^(?!\/)((.|\n)+)/, (msg, match) => { 185 bot.onText(/^(?!\/)((.|\n)+)/, (msg, match) => {
141 const user = msg.from; 186 const user = msg.from;
...@@ -199,43 +244,16 @@ bot.on('photo', (msg) => { ...@@ -199,43 +244,16 @@ bot.on('photo', (msg) => {
199 const photo = msg.photo[msg.photo.length - 1]; // Choose largest image possible 244 const photo = msg.photo[msg.photo.length - 1]; // Choose largest image possible
200 const photoId = photo.file_id; 245 const photoId = photo.file_id;
201 246
202 - // Download the image, which will later be deleted to avoid git detection 247 + // Detect text from given image
203 - bot.downloadFile(photoId, '.').then(function (filePath) { 248 + detectText(photoId).then(function (text) {
204 - console.log('Image downloaded: ', filePath); 249 + // Translate the result
205 - 250 + translate(user, text).then(function (result) {
206 - // Begin tesseract OCR 251 + // Send recognized text to user
207 - tesseract.recognize(filePath).then(function (text) { 252 + bot.sendMessage(chatId, result);
208 - console.log('OCR result: ', text);
209 -
210 - // Translate the result
211 - translate(user, text).then(function (result) {
212 - // Send recognized text to user
213 - bot.sendMessage(chatId, result);
214 - // Delete the image
215 - fs.unlink(filePath, function (error) {
216 - if (error) {
217 - console.log('Error deleting image: ', error);
218 - } else {
219 - console.log('Successfully deleted file ', filePath);
220 - }
221 - });
222 - });
223 - }).catch(function (error) {
224 - // OCR failed
225 - console.log('OCR error: ', error);
226 -
227 - // Delete the image
228 - fs.unlink(filePath, function (error) {
229 - if (error) {
230 - console.log('Error deleting image: ', error);
231 - } else {
232 - console.log('Successfully deleted file ', filePath);
233 - }
234 - });
235 }); 253 });
236 }).catch(function (error) { 254 }).catch(function (error) {
237 - // Image download failed 255 + // Text detection failed
238 - console.log('Image download error: ', error); 256 + console.log('Text detection error: ', error);
239 }); 257 });
240 }); 258 });
241 259
......
...@@ -10,5 +10,9 @@ module.exports = { ...@@ -10,5 +10,9 @@ module.exports = {
10 // https://developers.naver.com/apps/#/register 10 // https://developers.naver.com/apps/#/register
11 client_id: 'XXXXXXXXXXXXXXXXXXXX', 11 client_id: 'XXXXXXXXXXXXXXXXXXXX',
12 client_secret: 'XXXXXXXXXX' 12 client_secret: 'XXXXXXXXXX'
13 + },
14 + gcloud: {
15 + projectId: 'your_project_name',
16 + keyFilename: '/path/to/your/keyfile.json'
13 } 17 }
14 } 18 }
......
This diff is collapsed. Click to expand it.
...@@ -10,9 +10,9 @@ ...@@ -10,9 +10,9 @@
10 "author": "강수인", 10 "author": "강수인",
11 "license": "MIT", 11 "license": "MIT",
12 "dependencies": { 12 "dependencies": {
13 + "@google-cloud/vision": "^1.0.0",
13 "mongodb": "^3.2.6", 14 "mongodb": "^3.2.6",
14 "node-telegram-bot-api": "^0.30.0", 15 "node-telegram-bot-api": "^0.30.0",
15 - "node-tesseract-ocr": "^0.1.0",
16 "request": "^2.88.0" 16 "request": "^2.88.0"
17 } 17 }
18 } 18 }
......