Showing
2 changed files
with
115 additions
and
0 deletions
code/PreprocessingTrainingData.ipynb
0 → 100644
This diff could not be displayed because it is too large.
code/TrainingByBi-Sent2Vec.ipynb
0 → 100644
1 | +{ | ||
2 | + "nbformat": 4, | ||
3 | + "nbformat_minor": 0, | ||
4 | + "metadata": { | ||
5 | + "colab": { | ||
6 | + "name": "Untitled0.ipynb", | ||
7 | + "provenance": [] | ||
8 | + }, | ||
9 | + "kernelspec": { | ||
10 | + "name": "python3", | ||
11 | + "display_name": "Python 3" | ||
12 | + }, | ||
13 | + "language_info": { | ||
14 | + "name": "python" | ||
15 | + } | ||
16 | + }, | ||
17 | + "cells": [ | ||
18 | + { | ||
19 | + "cell_type": "code", | ||
20 | + "metadata": { | ||
21 | + "colab": { | ||
22 | + "base_uri": "https://localhost:8080/" | ||
23 | + }, | ||
24 | + "id": "gUZeRq7OjdB0", | ||
25 | + "outputId": "e4b56ee0-f158-431d-c2c4-6bbe40be0d4e" | ||
26 | + }, | ||
27 | + "source": [ | ||
28 | + "!git clone https://github.com/epfml/Bi-Sent2Vec.git" | ||
29 | + ], | ||
30 | + "execution_count": null, | ||
31 | + "outputs": [ | ||
32 | + { | ||
33 | + "output_type": "stream", | ||
34 | + "text": [ | ||
35 | + "Cloning into 'Bi-Sent2Vec'...\n", | ||
36 | + "remote: Enumerating objects: 55, done.\u001b[K\n", | ||
37 | + "remote: Counting objects: 100% (55/55), done.\u001b[K\n", | ||
38 | + "remote: Compressing objects: 100% (45/45), done.\u001b[K\n", | ||
39 | + "remote: Total 55 (delta 17), reused 35 (delta 7), pack-reused 0\u001b[K\n", | ||
40 | + "Unpacking objects: 100% (55/55), done.\n" | ||
41 | + ], | ||
42 | + "name": "stdout" | ||
43 | + } | ||
44 | + ] | ||
45 | + }, | ||
46 | + { | ||
47 | + "cell_type": "code", | ||
48 | + "metadata": { | ||
49 | + "colab": { | ||
50 | + "base_uri": "https://localhost:8080/" | ||
51 | + }, | ||
52 | + "id": "X-AKTcejjmGE", | ||
53 | + "outputId": "01e68019-ab89-4645-e60d-6d933f025a8e" | ||
54 | + }, | ||
55 | + "source": [ | ||
56 | + "!cd Bi-Sent2Vec/ && make" | ||
57 | + ], | ||
58 | + "execution_count": null, | ||
59 | + "outputs": [ | ||
60 | + { | ||
61 | + "output_type": "stream", | ||
62 | + "text": [ | ||
63 | + "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/args.cc\n", | ||
64 | + "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/dictionary.cc\n", | ||
65 | + "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/productquantizer.cc\n", | ||
66 | + "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/matrix.cc\n", | ||
67 | + "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/shmem_matrix.cc\n", | ||
68 | + "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/qmatrix.cc\n", | ||
69 | + "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/vector.cc\n", | ||
70 | + "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/model.cc\n", | ||
71 | + "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/utils.cc\n", | ||
72 | + "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/fasttext.cc\n", | ||
73 | + "c++ -pthread -std=c++0x -O3 -funroll-loops args.o dictionary.o productquantizer.o matrix.o shmem_matrix.o qmatrix.o vector.o model.o utils.o fasttext.o src/main.cc -o fasttext -lrt\n" | ||
74 | + ], | ||
75 | + "name": "stdout" | ||
76 | + } | ||
77 | + ] | ||
78 | + }, | ||
79 | + { | ||
80 | + "cell_type": "code", | ||
81 | + "metadata": { | ||
82 | + "colab": { | ||
83 | + "base_uri": "https://localhost:8080/" | ||
84 | + }, | ||
85 | + "id": "Jb8Fy8vekIVL", | ||
86 | + "outputId": "bfc5acc5-fe30-4f1e-dfaf-1285fd59afea" | ||
87 | + }, | ||
88 | + "source": [ | ||
89 | + "!cd Bi-Sent2Vec/ && ./fasttext bisent2vec -input ../drive/MyDrive/TrainingdataWithPOSOnlyKo.txt -output ../drive/MyDrive/lastModel -dim 300 -lr 0.2 -neg 10 -bucket 2000000 -maxVocabSize 750000 -thread 30 -t 0.000005 -epoch 5 -minCount 8 -dropoutK 4 -loss ns -wordNgrams 2 -numCheckPoints 5" | ||
90 | + ], | ||
91 | + "execution_count": null, | ||
92 | + "outputs": [ | ||
93 | + { | ||
94 | + "output_type": "stream", | ||
95 | + "text": [ | ||
96 | + "Read 95M words\n", | ||
97 | + "Number of words: 118144\n", | ||
98 | + "Number of labels: 0\n", | ||
99 | + "tcmalloc: large alloc 2541780992 bytes == 0x55d87b74a000 @ 0x7f822311c887 0x55d86f321a9c 0x55d86f33e278 0x55d86f33c0ba 0x55d86f33fec5 0x55d86f308632 0x7f8221fb1bf7 0x55d86f3088fa\n", | ||
100 | + "Progress: 20.0% words/sec/thread: 72927 lr: 0.159999 loss: 1.837360 eta: 0h2m \n", | ||
101 | + "Saving Model ----- Checkpoint 1\n", | ||
102 | + "Progress: 40.0% words/sec/thread: 71615 lr: 0.119999 loss: 1.784353 eta: 0h2m \n", | ||
103 | + "Saving Model ----- Checkpoint 2\n", | ||
104 | + "Progress: 60.0% words/sec/thread: 70740 lr: 0.080000 loss: 1.709536 eta: 0h1m \n", | ||
105 | + "Saving Model ----- Checkpoint 3\n", | ||
106 | + "Progress: 80.0% words/sec/thread: 70405 lr: 0.039999 loss: 1.627526 eta: 0h0m \n", | ||
107 | + "Saving Model ----- Checkpoint 4\n", | ||
108 | + "Progress: 100.0% words/sec/thread: 70414 lr: 0.000000 loss: 1.567680 eta: 0h0m \n" | ||
109 | + ], | ||
110 | + "name": "stdout" | ||
111 | + } | ||
112 | + ] | ||
113 | + } | ||
114 | + ] | ||
115 | +} | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment