students_num-checkpoint.ipynb 19.3 KB
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 21398/21398 [00:16<00:00, 1308.22it/s]\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import pyspark\n",
    "from tqdm import tqdm\n",
    "\n",
    "basic_folder = ''\n",
    "file_name2 = basic_folder + '2019_students_num.csv'\n",
    "df2 =pd.read_csv(file_name2)\n",
    "\n",
    "count_row=df2.shape[0] #number of rows\n",
    "elementary=[]\n",
    "middle=[]\n",
    "high=[]\n",
    "\n",
    "def type_of_school():\n",
    "    for x in tqdm(range(count_row)):\n",
    "        type=df2.loc[x]['학교급']\n",
    "        if(type=='고등학교'):\n",
    "            row=[df2.loc[x]['학교명'],df2.loc[x]['1학년_학생수_계'],df2.loc[x]['2학년_학생수_계'],df2.loc[x]['3학년_학생수_계']]\n",
    "            high.append(row)\n",
    "        elif(type=='중학교'):\n",
    "            row=[df2.loc[x]['학교명'],df2.loc[x]['1학년_학생수_계'],df2.loc[x]['2학년_학생수_계'],df2.loc[x]['3학년_학생수_계']]\n",
    "            middle.append(row)\n",
    "        elif(type=='초등학교'):\n",
    "            row=[df2.loc[x]['학교명'],df2.loc[x]['1학년_학생수_계'],df2.loc[x]['2학년_학생수_계'],df2.loc[x]['3학년_학생수_계'],df2.loc[x]['4학년_학생수_계'],df2.loc[x]['5학년_학생수_계'],df2.loc[x]['6학년_학생수_계']]\n",
    "            elementary.append(row)\n",
    "\n",
    "type_of_school()\n",
    "            \n",
    "columns=['school_name','1_stu_num','2_stu_num','3_stu_num']\n",
    "high_df=pd.DataFrame(high,columns=columns)\n",
    "high_df.to_csv(r'high_school_stu_num.csv')\n",
    "\n",
    "middle_df=pd.DataFrame(middle,columns=columns)\n",
    "middle_df.to_csv(r'middle_school_stu_num.csv')\n",
    "\n",
    "columns1=['school_name','1_stu_num','2_stu_num','3_stu_num','4_stu_num','5_stu_num','6_stu_num']\n",
    "ele_df=pd.DataFrame(elementary,columns=columns1)\n",
    "ele_df.to_csv(r'elementary_school_stu_num.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 0/11873 [00:00<?, ?it/s]\n"
     ]
    },
    {
     "ename": "KeyError",
     "evalue": "'도로명주소'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m   4409\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4410\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mlibindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value_at\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   4411\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.get_value_at\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.get_value_at\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/util.pxd\u001b[0m in \u001b[0;36mpandas._libs.util.get_value_at\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/util.pxd\u001b[0m in \u001b[0;36mpandas._libs.util.validate_indexer\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: 'str' object cannot be interpreted as an integer",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-9-924f5168e0e8>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     23\u001b[0m             \u001b[0melementary\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m \u001b[0mtype_of_school\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     26\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'school_name'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'school_addr'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'latitude'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'longitude'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     27\u001b[0m \u001b[0mmiddle_df\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmiddle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-9-924f5168e0e8>\u001b[0m in \u001b[0;36mtype_of_school\u001b[0;34m()\u001b[0m\n\u001b[1;32m     20\u001b[0m             \u001b[0mmiddle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     21\u001b[0m         \u001b[0;32melif\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m==\u001b[0m\u001b[0;34m'초등학교'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m             \u001b[0mrow\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'학교명'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'도로명주소'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'위도'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'경도'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     23\u001b[0m             \u001b[0melementary\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    869\u001b[0m         \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    870\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 871\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    872\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    873\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m   4416\u001b[0m                     \u001b[0;32mraise\u001b[0m \u001b[0mInvalidIndexError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4417\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4418\u001b[0;31m                     \u001b[0;32mraise\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   4419\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4420\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m   4402\u001b[0m         \u001b[0mk\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_convert_scalar_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkind\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"getitem\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4403\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4404\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtz\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"tz\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   4405\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4406\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mholds_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_boolean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mKeyError\u001b[0m: '도로명주소'"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import pyspark\n",
    "from tqdm import tqdm\n",
    "\n",
    "basic_folder = ''\n",
    "file_name = basic_folder + 'elementary_middle_schools_location.csv'\n",
    "\n",
    "df2 =pd.read_csv(file_name)\n",
    "count_row=df2.shape[0]\n",
    "\n",
    "elementary=[]\n",
    "middle=[]\n",
    "\n",
    "def type_of_school():\n",
    "    for x in tqdm(range(count_row)):\n",
    "        type=df2.loc[x]['학교급구분']\n",
    "        if(type=='중학교'):\n",
    "            row=[df2.loc[x]['학교명'],df2.loc[x]['소재지도로명주소'],df2.loc[x]['위도'],df2.loc[x]['경도']]\n",
    "            middle.append(row)\n",
    "        elif(type=='초등학교'):\n",
    "            row=[df2.loc[x]['학교명'],df2.loc[x]['소재지도로명주소'],df2.loc[x]['위도'],df2.loc[x]['경도']]\n",
    "            elementary.append(row)\n",
    "\n",
    "type_of_school()\n",
    "columns=['school_name','school_addr','latitude','longitude']\n",
    "middle_df=pd.DataFrame(middle,columns=columns)\n",
    "middle_df.to_csv(r'middle_school.csv')\n",
    "\n",
    "elem_df=pd.DataFrame(elementary,columns=columns)\n",
    "elem_df.to_csv(r'elementary_school.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2360/2360 [18:07<00:00,  2.17it/s]\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import pyspark\n",
    "from tqdm import tqdm\n",
    "\n",
    "basic_folder = ''\n",
    "file_name = basic_folder + 'middle_school.csv'\n",
    "file_name2 = basic_folder + 'middle_school_stu_num.csv'\n",
    "\n",
    "middle_df =pd.read_csv(file_name)\n",
    "middle_stu_num_df=pd.read_csv(file_name2)\n",
    "\n",
    "count_row=middle_df.shape[0]\n",
    "count_row2=middle_stu_num_df.shape[0]\n",
    "\n",
    "middle_arr=[]\n",
    "\n",
    "def find_middle_student_num():\n",
    "    for x in tqdm(range(count_row)):\n",
    "        name=middle_df.loc[x]['school_name']\n",
    "        for y in range(count_row2):\n",
    "            if name == middle_stu_num_df.loc[y]['school_name']:\n",
    "                row=[middle_df.loc[x]['school_name'],middle_df.loc[x]['school_addr'],middle_df.loc[x]['latitude'],middle_df.loc[x]['longitude'],\n",
    "                   middle_stu_num_df.loc[y]['1_stu_num'],middle_stu_num_df.loc[y]['2_stu_num'],middle_stu_num_df.loc[y]['3_stu_num']]\n",
    "                middle_arr.append(row)\n",
    "                    \n",
    "\n",
    "find_middle_student_num()\n",
    "columns=['school_name','school_addr','latitude','longitude','1_stu_num','2_stu_num','3_stu_num']\n",
    "final_middle_df=pd.DataFrame(middle_arr,columns=columns)\n",
    "final_middle_df.to_csv(r'final_middle_school.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "0it [00:00, ?it/s]\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import pyspark\n",
    "from tqdm import tqdm\n",
    "\n",
    "basic_folder = ''\n",
    "file_name = basic_folder + 'elementary_school.csv'\n",
    "file_name2 = basic_folder + 'elementary_school_stu_num.csv'\n",
    "\n",
    "ele_df =pd.read_csv(file_name)\n",
    "ele_stu_num_df=pd.read_csv(file_name2)\n",
    "\n",
    "count_row=ele_df.shape[0]\n",
    "count_row2=ele_stu_num_df.shape[0]\n",
    "\n",
    "ele_arr=[]\n",
    "\n",
    "def find_ele_student_num():\n",
    "    for x in tqdm(range(count_row)):\n",
    "        name=ele_df.loc[x]['school_name']\n",
    "        for y in range(count_row2):\n",
    "            if name == ele_stu_num_df.loc[y]['school_name']:\n",
    "                row=[ele_df.loc[x]['school_name'],ele_df.loc[x]['school_addr'],ele_df.loc[x]['latitude'],ele_df.loc[x]['longitude'],\n",
    "                   ele_stu_num_df.loc[y]['1_stu_num'],ele_stu_num_df.loc[y]['2_stu_num'],ele_stu_num_df.loc[y]['3_stu_num'],\n",
    "                    ele_stu_num_df.loc[y]['4_stu_num'],ele_stu_num_df.loc[y]['5_stu_num'],ele_stu_num_df.loc[y]['6_stu_num']]\n",
    "                ele_arr.append(row)\n",
    "                    \n",
    "find_ele_student_num()\n",
    "columns=['school_name','school_addr','latitude','longitude','1_stu_num','2_stu_num','3_stu_num','4_stu_num','5_stu_num','6_stu_num']\n",
    "final_ele_df=pd.DataFrame(ele_arr,columns=columns)\n",
    "final_ele_df.to_csv(r'final_ele_school.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import pyspark\n",
    "from tqdm import tqdm\n",
    "\n",
    "basic_folder = ''\n",
    "file_name = basic_folder + 'high_school.csv'\n",
    "file_name2 = basic_folder + 'high_school_stu_num.csv'\n",
    "\n",
    "middle_df =pd.read_csv(file_name)\n",
    "middle_stu_num_df=pd.read_csv(file_name2)\n",
    "\n",
    "count_row=middle_df.shape[0]\n",
    "count_row2=middle_stu_num_df.shape[0]\n",
    "\n",
    "high_arr=[]\n",
    "\n",
    "def find_high_student_num():\n",
    "    for x in tqdm(range(count_row)):\n",
    "        name=middle_df.loc[x]['school_name']\n",
    "        for y in range(count_row2):\n",
    "            if name == middle_stu_num_df.loc[y]['school_name']:\n",
    "                row=[middle_df.loc[x]['school_name'],middle_df.loc[x]['school_addr'],middle_df.loc[x]['latitude'],middle_df.loc[x]['longitude'],\n",
    "                   middle_stu_num_df.loc[y]['1_stu_num'],middle_stu_num_df.loc[y]['2_stu_num'],middle_stu_num_df.loc[y]['3_stu_num']]\n",
    "                high_arr.append(row)\n",
    "                    \n",
    "find_high_student_num()\n",
    "columns=['school_name','school_addr','latitude','longitude','1_stu_num','2_stu_num','3_stu_num']\n",
    "final_high_df=pd.DataFrame(middle_arr,columns=columns)\n",
    "final_high_df.to_csv(r'final_high_school.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}