GyuhoLee

[Update] 변환된 xml을 문장별로 string list로 파싱

...@@ -20,14 +20,8 @@ ...@@ -20,14 +20,8 @@
20 </component> 20 </component>
21 <component name="ChangeListManager"> 21 <component name="ChangeListManager">
22 <list default="true" id="b9decb0c-dc9e-4239-bdad-09ea8dd5179d" name="Default Changelist" comment=""> 22 <list default="true" id="b9decb0c-dc9e-4239-bdad-09ea8dd5179d" name="Default Changelist" comment="">
23 - <change beforePath="$PROJECT_DIR$/.idea/.gitignore" beforeDir="false" /> 23 + <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
24 - <change beforePath="$PROJECT_DIR$/.idea/inspectionProfiles/profiles_settings.xml" beforeDir="false" />
25 - <change beforePath="$PROJECT_DIR$/.idea/misc.xml" beforeDir="false" />
26 - <change beforePath="$PROJECT_DIR$/.idea/modules.xml" beforeDir="false" />
27 - <change beforePath="$PROJECT_DIR$/.idea/src.iml" beforeDir="false" />
28 - <change beforePath="$PROJECT_DIR$/.idea/vcs.xml" beforeDir="false" />
29 <change beforePath="$PROJECT_DIR$/subtitle.py" beforeDir="false" afterPath="$PROJECT_DIR$/subtitle.py" afterDir="false" /> 24 <change beforePath="$PROJECT_DIR$/subtitle.py" beforeDir="false" afterPath="$PROJECT_DIR$/subtitle.py" afterDir="false" />
30 - <change beforePath="$PROJECT_DIR$/../캡스톤디자인_3주차_주간보고서.hwp" beforeDir="false" />
31 </list> 25 </list>
32 <option name="SHOW_DIALOG" value="false" /> 26 <option name="SHOW_DIALOG" value="false" />
33 <option name="HIGHLIGHT_CONFLICTS" value="true" /> 27 <option name="HIGHLIGHT_CONFLICTS" value="true" />
...@@ -101,22 +95,22 @@ ...@@ -101,22 +95,22 @@
101 </option> 95 </option>
102 </component> 96 </component>
103 <component name="WindowStateProjectService"> 97 <component name="WindowStateProjectService">
104 - <state width="1899" height="282" key="GridCell.Tab.0.bottom" timestamp="1604304028779"> 98 + <state width="1899" height="282" key="GridCell.Tab.0.bottom" timestamp="1604306110978">
105 <screen x="1920" y="0" width="1920" height="1040" /> 99 <screen x="1920" y="0" width="1920" height="1040" />
106 </state> 100 </state>
107 - <state width="1899" height="282" key="GridCell.Tab.0.bottom/0.0.1920.1040/1920.0.1920.1040@1920.0.1920.1040" timestamp="1604304028779" /> 101 + <state width="1899" height="282" key="GridCell.Tab.0.bottom/0.0.1920.1040/1920.0.1920.1040@1920.0.1920.1040" timestamp="1604306110978" />
108 - <state width="1899" height="282" key="GridCell.Tab.0.center" timestamp="1604304028779"> 102 + <state width="1899" height="282" key="GridCell.Tab.0.center" timestamp="1604306110978">
109 <screen x="1920" y="0" width="1920" height="1040" /> 103 <screen x="1920" y="0" width="1920" height="1040" />
110 </state> 104 </state>
111 - <state width="1899" height="282" key="GridCell.Tab.0.center/0.0.1920.1040/1920.0.1920.1040@1920.0.1920.1040" timestamp="1604304028779" /> 105 + <state width="1899" height="282" key="GridCell.Tab.0.center/0.0.1920.1040/1920.0.1920.1040@1920.0.1920.1040" timestamp="1604306110978" />
112 - <state width="1899" height="282" key="GridCell.Tab.0.left" timestamp="1604304028779"> 106 + <state width="1899" height="282" key="GridCell.Tab.0.left" timestamp="1604306110978">
113 <screen x="1920" y="0" width="1920" height="1040" /> 107 <screen x="1920" y="0" width="1920" height="1040" />
114 </state> 108 </state>
115 - <state width="1899" height="282" key="GridCell.Tab.0.left/0.0.1920.1040/1920.0.1920.1040@1920.0.1920.1040" timestamp="1604304028779" /> 109 + <state width="1899" height="282" key="GridCell.Tab.0.left/0.0.1920.1040/1920.0.1920.1040@1920.0.1920.1040" timestamp="1604306110978" />
116 - <state width="1899" height="282" key="GridCell.Tab.0.right" timestamp="1604304028779"> 110 + <state width="1899" height="282" key="GridCell.Tab.0.right" timestamp="1604306110978">
117 <screen x="1920" y="0" width="1920" height="1040" /> 111 <screen x="1920" y="0" width="1920" height="1040" />
118 </state> 112 </state>
119 - <state width="1899" height="282" key="GridCell.Tab.0.right/0.0.1920.1040/1920.0.1920.1040@1920.0.1920.1040" timestamp="1604304028779" /> 113 + <state width="1899" height="282" key="GridCell.Tab.0.right/0.0.1920.1040/1920.0.1920.1040@1920.0.1920.1040" timestamp="1604306110978" />
120 <state x="2381" y="164" key="SettingsEditor" timestamp="1604303734485"> 114 <state x="2381" y="164" key="SettingsEditor" timestamp="1604303734485">
121 <screen x="1920" y="0" width="1920" height="1040" /> 115 <screen x="1920" y="0" width="1920" height="1040" />
122 </state> 116 </state>
......
1 from pytube import YouTube 1 from pytube import YouTube
2 +from xml.etree import ElementTree
2 3
4 +#youtube url의 자막 -> xml으로 가져오기
3 video_url = 'https://www.youtube.com/watch?v=ecUWKU_v318' 5 video_url = 'https://www.youtube.com/watch?v=ecUWKU_v318'
4 yt = YouTube(video_url) 6 yt = YouTube(video_url)
5 title = yt.title 7 title = yt.title
6 description = yt.description 8 description = yt.description
7 caption = yt.captions.get_by_language_code('ko') 9 caption = yt.captions.get_by_language_code('ko')
8 -caption_xml = caption.xml_captions
...\ No newline at end of file ...\ No newline at end of file
10 +caption_xml = caption.xml_captions
11 +
12 +#xml -> string list로 파싱(문장별)
13 +root = ElementTree.fromstring(caption_xml)
14 +sentences = []
15 +print(root.tag, root.attrib)
16 +for child in root.findall("text"):
17 + sentences.append(child.text.replace('\n', ' '))
18 +print(sentences)
...\ No newline at end of file ...\ No newline at end of file
......