Showing
5 changed files
with
0 additions
and
115 deletions
code/PreprocessingTrainingData.ipynb
deleted
100644 → 0
This diff could not be displayed because it is too large.
code/TrainingByBi-Sent2Vec.PNG
0 → 100644
50.5 KB
code/TrainingByBi-Sent2Vec.ipynb
deleted
100644 → 0
1 | -{ | ||
2 | - "nbformat": 4, | ||
3 | - "nbformat_minor": 0, | ||
4 | - "metadata": { | ||
5 | - "colab": { | ||
6 | - "name": "Untitled0.ipynb", | ||
7 | - "provenance": [] | ||
8 | - }, | ||
9 | - "kernelspec": { | ||
10 | - "name": "python3", | ||
11 | - "display_name": "Python 3" | ||
12 | - }, | ||
13 | - "language_info": { | ||
14 | - "name": "python" | ||
15 | - } | ||
16 | - }, | ||
17 | - "cells": [ | ||
18 | - { | ||
19 | - "cell_type": "code", | ||
20 | - "metadata": { | ||
21 | - "colab": { | ||
22 | - "base_uri": "https://localhost:8080/" | ||
23 | - }, | ||
24 | - "id": "gUZeRq7OjdB0", | ||
25 | - "outputId": "e4b56ee0-f158-431d-c2c4-6bbe40be0d4e" | ||
26 | - }, | ||
27 | - "source": [ | ||
28 | - "!git clone https://github.com/epfml/Bi-Sent2Vec.git" | ||
29 | - ], | ||
30 | - "execution_count": null, | ||
31 | - "outputs": [ | ||
32 | - { | ||
33 | - "output_type": "stream", | ||
34 | - "text": [ | ||
35 | - "Cloning into 'Bi-Sent2Vec'...\n", | ||
36 | - "remote: Enumerating objects: 55, done.\u001b[K\n", | ||
37 | - "remote: Counting objects: 100% (55/55), done.\u001b[K\n", | ||
38 | - "remote: Compressing objects: 100% (45/45), done.\u001b[K\n", | ||
39 | - "remote: Total 55 (delta 17), reused 35 (delta 7), pack-reused 0\u001b[K\n", | ||
40 | - "Unpacking objects: 100% (55/55), done.\n" | ||
41 | - ], | ||
42 | - "name": "stdout" | ||
43 | - } | ||
44 | - ] | ||
45 | - }, | ||
46 | - { | ||
47 | - "cell_type": "code", | ||
48 | - "metadata": { | ||
49 | - "colab": { | ||
50 | - "base_uri": "https://localhost:8080/" | ||
51 | - }, | ||
52 | - "id": "X-AKTcejjmGE", | ||
53 | - "outputId": "01e68019-ab89-4645-e60d-6d933f025a8e" | ||
54 | - }, | ||
55 | - "source": [ | ||
56 | - "!cd Bi-Sent2Vec/ && make" | ||
57 | - ], | ||
58 | - "execution_count": null, | ||
59 | - "outputs": [ | ||
60 | - { | ||
61 | - "output_type": "stream", | ||
62 | - "text": [ | ||
63 | - "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/args.cc\n", | ||
64 | - "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/dictionary.cc\n", | ||
65 | - "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/productquantizer.cc\n", | ||
66 | - "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/matrix.cc\n", | ||
67 | - "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/shmem_matrix.cc\n", | ||
68 | - "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/qmatrix.cc\n", | ||
69 | - "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/vector.cc\n", | ||
70 | - "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/model.cc\n", | ||
71 | - "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/utils.cc\n", | ||
72 | - "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/fasttext.cc\n", | ||
73 | - "c++ -pthread -std=c++0x -O3 -funroll-loops args.o dictionary.o productquantizer.o matrix.o shmem_matrix.o qmatrix.o vector.o model.o utils.o fasttext.o src/main.cc -o fasttext -lrt\n" | ||
74 | - ], | ||
75 | - "name": "stdout" | ||
76 | - } | ||
77 | - ] | ||
78 | - }, | ||
79 | - { | ||
80 | - "cell_type": "code", | ||
81 | - "metadata": { | ||
82 | - "colab": { | ||
83 | - "base_uri": "https://localhost:8080/" | ||
84 | - }, | ||
85 | - "id": "Jb8Fy8vekIVL", | ||
86 | - "outputId": "bfc5acc5-fe30-4f1e-dfaf-1285fd59afea" | ||
87 | - }, | ||
88 | - "source": [ | ||
89 | - "!cd Bi-Sent2Vec/ && ./fasttext bisent2vec -input ../drive/MyDrive/TrainingdataWithPOSOnlyKo.txt -output ../drive/MyDrive/lastModel -dim 300 -lr 0.2 -neg 10 -bucket 2000000 -maxVocabSize 750000 -thread 30 -t 0.000005 -epoch 5 -minCount 8 -dropoutK 4 -loss ns -wordNgrams 2 -numCheckPoints 5" | ||
90 | - ], | ||
91 | - "execution_count": null, | ||
92 | - "outputs": [ | ||
93 | - { | ||
94 | - "output_type": "stream", | ||
95 | - "text": [ | ||
96 | - "Read 95M words\n", | ||
97 | - "Number of words: 118144\n", | ||
98 | - "Number of labels: 0\n", | ||
99 | - "tcmalloc: large alloc 2541780992 bytes == 0x55d87b74a000 @ 0x7f822311c887 0x55d86f321a9c 0x55d86f33e278 0x55d86f33c0ba 0x55d86f33fec5 0x55d86f308632 0x7f8221fb1bf7 0x55d86f3088fa\n", | ||
100 | - "Progress: 20.0% words/sec/thread: 72927 lr: 0.159999 loss: 1.837360 eta: 0h2m \n", | ||
101 | - "Saving Model ----- Checkpoint 1\n", | ||
102 | - "Progress: 40.0% words/sec/thread: 71615 lr: 0.119999 loss: 1.784353 eta: 0h2m \n", | ||
103 | - "Saving Model ----- Checkpoint 2\n", | ||
104 | - "Progress: 60.0% words/sec/thread: 70740 lr: 0.080000 loss: 1.709536 eta: 0h1m \n", | ||
105 | - "Saving Model ----- Checkpoint 3\n", | ||
106 | - "Progress: 80.0% words/sec/thread: 70405 lr: 0.039999 loss: 1.627526 eta: 0h0m \n", | ||
107 | - "Saving Model ----- Checkpoint 4\n", | ||
108 | - "Progress: 100.0% words/sec/thread: 70414 lr: 0.000000 loss: 1.567680 eta: 0h0m \n" | ||
109 | - ], | ||
110 | - "name": "stdout" | ||
111 | - } | ||
112 | - ] | ||
113 | - } | ||
114 | - ] | ||
115 | -} | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
code/morphemeAnalyzerByKhaiii&nltk.PNG
0 → 100644
14.7 KB
code/readAI-hubData&Preprocessing.PNG
0 → 100644
27 KB
-
Please register or login to post a comment