TrainingByBi-Sent2Vec.ipynb 4.29 KB
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Untitled0.ipynb",
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gUZeRq7OjdB0",
        "outputId": "e4b56ee0-f158-431d-c2c4-6bbe40be0d4e"
      },
      "source": [
        "!git clone https://github.com/epfml/Bi-Sent2Vec.git"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Cloning into 'Bi-Sent2Vec'...\n",
            "remote: Enumerating objects: 55, done.\u001b[K\n",
            "remote: Counting objects: 100% (55/55), done.\u001b[K\n",
            "remote: Compressing objects: 100% (45/45), done.\u001b[K\n",
            "remote: Total 55 (delta 17), reused 35 (delta 7), pack-reused 0\u001b[K\n",
            "Unpacking objects: 100% (55/55), done.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "X-AKTcejjmGE",
        "outputId": "01e68019-ab89-4645-e60d-6d933f025a8e"
      },
      "source": [
        "!cd Bi-Sent2Vec/ && make"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/args.cc\n",
            "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/dictionary.cc\n",
            "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/productquantizer.cc\n",
            "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/matrix.cc\n",
            "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/shmem_matrix.cc\n",
            "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/qmatrix.cc\n",
            "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/vector.cc\n",
            "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/model.cc\n",
            "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/utils.cc\n",
            "c++ -pthread -std=c++0x -O3 -funroll-loops -c src/fasttext.cc\n",
            "c++ -pthread -std=c++0x -O3 -funroll-loops args.o dictionary.o productquantizer.o matrix.o shmem_matrix.o qmatrix.o vector.o model.o utils.o fasttext.o src/main.cc -o fasttext -lrt\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Jb8Fy8vekIVL",
        "outputId": "bfc5acc5-fe30-4f1e-dfaf-1285fd59afea"
      },
      "source": [
        "!cd Bi-Sent2Vec/ && ./fasttext bisent2vec -input ../drive/MyDrive/TrainingdataWithPOSOnlyKo.txt -output ../drive/MyDrive/lastModel -dim 300 -lr 0.2 -neg 10 -bucket 2000000 -maxVocabSize 750000 -thread 30 -t 0.000005 -epoch 5 -minCount 8 -dropoutK 4 -loss ns -wordNgrams 2 -numCheckPoints 5"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Read 95M words\n",
            "Number of words:  118144\n",
            "Number of labels: 0\n",
            "tcmalloc: large alloc 2541780992 bytes == 0x55d87b74a000 @  0x7f822311c887 0x55d86f321a9c 0x55d86f33e278 0x55d86f33c0ba 0x55d86f33fec5 0x55d86f308632 0x7f8221fb1bf7 0x55d86f3088fa\n",
            "Progress: 20.0%  words/sec/thread: 72927  lr: 0.159999  loss: 1.837360  eta: 0h2m \n",
            "Saving Model ----- Checkpoint 1\n",
            "Progress: 40.0%  words/sec/thread: 71615  lr: 0.119999  loss: 1.784353  eta: 0h2m \n",
            "Saving Model ----- Checkpoint 2\n",
            "Progress: 60.0%  words/sec/thread: 70740  lr: 0.080000  loss: 1.709536  eta: 0h1m \n",
            "Saving Model ----- Checkpoint 3\n",
            "Progress: 80.0%  words/sec/thread: 70405  lr: 0.039999  loss: 1.627526  eta: 0h0m \n",
            "Saving Model ----- Checkpoint 4\n",
            "Progress: 100.0%  words/sec/thread: 70414  lr: 0.000000  loss: 1.567680  eta: 0h0m \n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}