students_num-checkpoint.ipynb
19.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 21398/21398 [00:16<00:00, 1308.22it/s]\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import pyspark\n",
"from tqdm import tqdm\n",
"\n",
"basic_folder = ''\n",
"file_name2 = basic_folder + '2019_students_num.csv'\n",
"df2 =pd.read_csv(file_name2)\n",
"\n",
"count_row=df2.shape[0] #number of rows\n",
"elementary=[]\n",
"middle=[]\n",
"high=[]\n",
"\n",
"def type_of_school():\n",
" for x in tqdm(range(count_row)):\n",
" type=df2.loc[x]['학교급']\n",
" if(type=='고등학교'):\n",
" row=[df2.loc[x]['학교명'],df2.loc[x]['1학년_학생수_계'],df2.loc[x]['2학년_학생수_계'],df2.loc[x]['3학년_학생수_계']]\n",
" high.append(row)\n",
" elif(type=='중학교'):\n",
" row=[df2.loc[x]['학교명'],df2.loc[x]['1학년_학생수_계'],df2.loc[x]['2학년_학생수_계'],df2.loc[x]['3학년_학생수_계']]\n",
" middle.append(row)\n",
" elif(type=='초등학교'):\n",
" row=[df2.loc[x]['학교명'],df2.loc[x]['1학년_학생수_계'],df2.loc[x]['2학년_학생수_계'],df2.loc[x]['3학년_학생수_계'],df2.loc[x]['4학년_학생수_계'],df2.loc[x]['5학년_학생수_계'],df2.loc[x]['6학년_학생수_계']]\n",
" elementary.append(row)\n",
"\n",
"type_of_school()\n",
" \n",
"columns=['school_name','1_stu_num','2_stu_num','3_stu_num']\n",
"high_df=pd.DataFrame(high,columns=columns)\n",
"high_df.to_csv(r'high_school_stu_num.csv')\n",
"\n",
"middle_df=pd.DataFrame(middle,columns=columns)\n",
"middle_df.to_csv(r'middle_school_stu_num.csv')\n",
"\n",
"columns1=['school_name','1_stu_num','2_stu_num','3_stu_num','4_stu_num','5_stu_num','6_stu_num']\n",
"ele_df=pd.DataFrame(elementary,columns=columns1)\n",
"ele_df.to_csv(r'elementary_school_stu_num.csv')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/11873 [00:00<?, ?it/s]\n"
]
},
{
"ename": "KeyError",
"evalue": "'도로명주소'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m 4409\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4410\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mlibindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value_at\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4411\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.get_value_at\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.get_value_at\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/util.pxd\u001b[0m in \u001b[0;36mpandas._libs.util.get_value_at\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/util.pxd\u001b[0m in \u001b[0;36mpandas._libs.util.validate_indexer\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: 'str' object cannot be interpreted as an integer",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-9-924f5168e0e8>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0melementary\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m \u001b[0mtype_of_school\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 26\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'school_name'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'school_addr'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'latitude'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'longitude'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0mmiddle_df\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmiddle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-9-924f5168e0e8>\u001b[0m in \u001b[0;36mtype_of_school\u001b[0;34m()\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mmiddle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;32melif\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m==\u001b[0m\u001b[0;34m'초등학교'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mrow\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'학교명'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'도로명주소'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'위도'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'경도'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0melementary\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 869\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 870\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 871\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 872\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 873\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m 4416\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mInvalidIndexError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4417\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4418\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4419\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4420\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m 4402\u001b[0m \u001b[0mk\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_convert_scalar_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkind\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"getitem\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4403\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4404\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtz\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"tz\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4405\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4406\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mholds_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_boolean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: '도로명주소'"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import pyspark\n",
"from tqdm import tqdm\n",
"\n",
"basic_folder = ''\n",
"file_name = basic_folder + 'elementary_middle_schools_location.csv'\n",
"\n",
"df2 =pd.read_csv(file_name)\n",
"count_row=df2.shape[0]\n",
"\n",
"elementary=[]\n",
"middle=[]\n",
"\n",
"def type_of_school():\n",
" for x in tqdm(range(count_row)):\n",
" type=df2.loc[x]['학교급구분']\n",
" if(type=='중학교'):\n",
" row=[df2.loc[x]['학교명'],df2.loc[x]['소재지도로명주소'],df2.loc[x]['위도'],df2.loc[x]['경도']]\n",
" middle.append(row)\n",
" elif(type=='초등학교'):\n",
" row=[df2.loc[x]['학교명'],df2.loc[x]['소재지도로명주소'],df2.loc[x]['위도'],df2.loc[x]['경도']]\n",
" elementary.append(row)\n",
"\n",
"type_of_school()\n",
"columns=['school_name','school_addr','latitude','longitude']\n",
"middle_df=pd.DataFrame(middle,columns=columns)\n",
"middle_df.to_csv(r'middle_school.csv')\n",
"\n",
"elem_df=pd.DataFrame(elementary,columns=columns)\n",
"elem_df.to_csv(r'elementary_school.csv')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 2360/2360 [18:07<00:00, 2.17it/s]\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import pyspark\n",
"from tqdm import tqdm\n",
"\n",
"basic_folder = ''\n",
"file_name = basic_folder + 'middle_school.csv'\n",
"file_name2 = basic_folder + 'middle_school_stu_num.csv'\n",
"\n",
"middle_df =pd.read_csv(file_name)\n",
"middle_stu_num_df=pd.read_csv(file_name2)\n",
"\n",
"count_row=middle_df.shape[0]\n",
"count_row2=middle_stu_num_df.shape[0]\n",
"\n",
"middle_arr=[]\n",
"\n",
"def find_middle_student_num():\n",
" for x in tqdm(range(count_row)):\n",
" name=middle_df.loc[x]['school_name']\n",
" for y in range(count_row2):\n",
" if name == middle_stu_num_df.loc[y]['school_name']:\n",
" row=[middle_df.loc[x]['school_name'],middle_df.loc[x]['school_addr'],middle_df.loc[x]['latitude'],middle_df.loc[x]['longitude'],\n",
" middle_stu_num_df.loc[y]['1_stu_num'],middle_stu_num_df.loc[y]['2_stu_num'],middle_stu_num_df.loc[y]['3_stu_num']]\n",
" middle_arr.append(row)\n",
" \n",
"\n",
"find_middle_student_num()\n",
"columns=['school_name','school_addr','latitude','longitude','1_stu_num','2_stu_num','3_stu_num']\n",
"final_middle_df=pd.DataFrame(middle_arr,columns=columns)\n",
"final_middle_df.to_csv(r'final_middle_school.csv')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"0it [00:00, ?it/s]\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import pyspark\n",
"from tqdm import tqdm\n",
"\n",
"basic_folder = ''\n",
"file_name = basic_folder + 'elementary_school.csv'\n",
"file_name2 = basic_folder + 'elementary_school_stu_num.csv'\n",
"\n",
"ele_df =pd.read_csv(file_name)\n",
"ele_stu_num_df=pd.read_csv(file_name2)\n",
"\n",
"count_row=ele_df.shape[0]\n",
"count_row2=ele_stu_num_df.shape[0]\n",
"\n",
"ele_arr=[]\n",
"\n",
"def find_ele_student_num():\n",
" for x in tqdm(range(count_row)):\n",
" name=ele_df.loc[x]['school_name']\n",
" for y in range(count_row2):\n",
" if name == ele_stu_num_df.loc[y]['school_name']:\n",
" row=[ele_df.loc[x]['school_name'],ele_df.loc[x]['school_addr'],ele_df.loc[x]['latitude'],ele_df.loc[x]['longitude'],\n",
" ele_stu_num_df.loc[y]['1_stu_num'],ele_stu_num_df.loc[y]['2_stu_num'],ele_stu_num_df.loc[y]['3_stu_num'],\n",
" ele_stu_num_df.loc[y]['4_stu_num'],ele_stu_num_df.loc[y]['5_stu_num'],ele_stu_num_df.loc[y]['6_stu_num']]\n",
" ele_arr.append(row)\n",
" \n",
"find_ele_student_num()\n",
"columns=['school_name','school_addr','latitude','longitude','1_stu_num','2_stu_num','3_stu_num','4_stu_num','5_stu_num','6_stu_num']\n",
"final_ele_df=pd.DataFrame(ele_arr,columns=columns)\n",
"final_ele_df.to_csv(r'final_ele_school.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import pyspark\n",
"from tqdm import tqdm\n",
"\n",
"basic_folder = ''\n",
"file_name = basic_folder + 'high_school.csv'\n",
"file_name2 = basic_folder + 'high_school_stu_num.csv'\n",
"\n",
"middle_df =pd.read_csv(file_name)\n",
"middle_stu_num_df=pd.read_csv(file_name2)\n",
"\n",
"count_row=middle_df.shape[0]\n",
"count_row2=middle_stu_num_df.shape[0]\n",
"\n",
"high_arr=[]\n",
"\n",
"def find_high_student_num():\n",
" for x in tqdm(range(count_row)):\n",
" name=middle_df.loc[x]['school_name']\n",
" for y in range(count_row2):\n",
" if name == middle_stu_num_df.loc[y]['school_name']:\n",
" row=[middle_df.loc[x]['school_name'],middle_df.loc[x]['school_addr'],middle_df.loc[x]['latitude'],middle_df.loc[x]['longitude'],\n",
" middle_stu_num_df.loc[y]['1_stu_num'],middle_stu_num_df.loc[y]['2_stu_num'],middle_stu_num_df.loc[y]['3_stu_num']]\n",
" high_arr.append(row)\n",
" \n",
"find_high_student_num()\n",
"columns=['school_name','school_addr','latitude','longitude','1_stu_num','2_stu_num','3_stu_num']\n",
"final_high_df=pd.DataFrame(middle_arr,columns=columns)\n",
"final_high_df.to_csv(r'final_high_school.csv')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}