노현종

VulnAbstractCrawler

This diff is collapsed. Click to expand it.
...@@ -38,8 +38,13 @@ ...@@ -38,8 +38,13 @@
38 <Reference Include="LibGit2Sharp, Version=0.25.0.0, Culture=neutral, PublicKeyToken=7cbde695407f0333, processorArchitecture=MSIL"> 38 <Reference Include="LibGit2Sharp, Version=0.25.0.0, Culture=neutral, PublicKeyToken=7cbde695407f0333, processorArchitecture=MSIL">
39 <HintPath>..\packages\LibGit2Sharp.0.25.0\lib\netstandard2.0\LibGit2Sharp.dll</HintPath> 39 <HintPath>..\packages\LibGit2Sharp.0.25.0\lib\netstandard2.0\LibGit2Sharp.dll</HintPath>
40 </Reference> 40 </Reference>
41 + <Reference Include="MySql.Data, Version=8.0.10.0, Culture=neutral, PublicKeyToken=c5687fc88969c44d, processorArchitecture=MSIL" />
41 <Reference Include="System" /> 42 <Reference Include="System" />
42 <Reference Include="System.Core" /> 43 <Reference Include="System.Core" />
44 + <Reference Include="System.ValueTuple, Version=4.0.2.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
45 + <HintPath>..\packages\System.ValueTuple.4.4.0\lib\net461\System.ValueTuple.dll</HintPath>
46 + <Private>True</Private>
47 + </Reference>
43 <Reference Include="System.Xml.Linq" /> 48 <Reference Include="System.Xml.Linq" />
44 <Reference Include="System.Data.DataSetExtensions" /> 49 <Reference Include="System.Data.DataSetExtensions" />
45 <Reference Include="Microsoft.CSharp" /> 50 <Reference Include="Microsoft.CSharp" />
...@@ -50,6 +55,7 @@ ...@@ -50,6 +55,7 @@
50 <ItemGroup> 55 <ItemGroup>
51 <Compile Include="Program.cs" /> 56 <Compile Include="Program.cs" />
52 <Compile Include="Properties\AssemblyInfo.cs" /> 57 <Compile Include="Properties\AssemblyInfo.cs" />
58 + <Compile Include="VulnPython.cs" />
53 </ItemGroup> 59 </ItemGroup>
54 <ItemGroup> 60 <ItemGroup>
55 <None Include="App.config" /> 61 <None Include="App.config" />
......
1 +using LibGit2Sharp;
2 +using System;
3 +using System.Collections.Generic;
4 +using System.IO;
5 +using System.Linq;
6 +using System.Security.Cryptography;
7 +using System.Text;
8 +using System.Text.RegularExpressions;
9 +using System.Threading.Tasks;
10 +
11 +namespace VulnCrawler
12 +{
13 + // 추상 클래스
14 + public abstract class VulnAbstractCrawler {
15 + /// <summary>
16 + /// 생성자
17 + /// 경로를 입력받아서(path)
18 + /// 레파지토리를 초기화하고
19 + /// 커밋 목록을 검색함
20 + /// </summary>
21 + /// <param name="path"></param>
22 + public VulnAbstractCrawler(string path) {
23 + Repository = new Repository(path);
24 + Commits = SearchCommits();
25 + }
26 +
27 + // 소멸자
28 + ~VulnAbstractCrawler() {
29 +
30 + Repository.Dispose();
31 + }
32 +
33 + // 정규식 그룹화
34 + // @@ -oldStart,oldLines +newStart,newLines @@ MethodName():
35 + public static string OldStart => "oldStart";
36 + public static string OldLines => "oldLines";
37 + public static string NewStart => "newStart";
38 + public static string NewLines => "newLines";
39 + public static string MethodName => "methodName";
40 +
41 +
42 + /// <summary>
43 + /// 레파지토리
44 + /// </summary>
45 + public Repository Repository { get; private set; }
46 +
47 + /// <summary>
48 + /// 커밋 목록
49 + /// </summary>
50 + public IEnumerable<Commit> Commits { get; private set; }
51 + /// <summary>
52 + /// 커밋에서 검색할 정규식 문자열
53 + /// </summary>
54 + protected string SearchKeyword => @"CVE-20\d\d-\d{4}";
55 + /// <summary>
56 + /// 패치 코드에서 함수 찾을 정규식 패턴 문자열
57 + /// </summary>
58 + protected abstract string RegexFuncPattern { get; }
59 + protected abstract string Extension { get; }
60 + public abstract IEnumerable<PatchEntryChanges> GetPatchEntryChanges(Patch patch);
61 + /// <summary>
62 + /// 정규식을 이용하여 @@ -\d,\d +\d,\d @@ MethodName(): 이런 패턴을 찾고
63 + /// 그룹화 하여 반환함 (OldStart, OldLines, NewStart, NewLines, MethodName
64 + /// </summary>
65 + /// <param name="patchCode">찾을 코드</param>
66 + /// <returns>정규식 그룹 컬렉션</returns>
67 + public abstract MatchCollection GetMatches(string patchCode);
68 + /// <summary>
69 + /// 파일스트림으로 부터 원본 함수 구하는 함수
70 + /// </summary>
71 + /// <param name="oldStream">파일 스트림</param>
72 + /// <param name="methodName">찾을 메서드 이름</param>
73 + /// <returns>함수 문자열</returns>
74 + protected abstract string GetOriginalFunc(Stream oldStream, string methodName);
75 + public abstract (string originalFunc, string hash) GetPatchResult(Stream oldStream, string methodName);
76 + /// <summary>
77 + /// 주석 제거 함수
78 + /// </summary>
79 + /// <param name="original">제거할 문자열</param>
80 + /// <returns>결과 문자열</returns>
81 + public abstract string RemoveComment(string original);
82 +
83 + /// <summary>
84 + /// 커밋 검색 함수(정규식 사용)
85 + /// 정규식은 SearchKeyword 사용함
86 + /// </summary>
87 + /// <returns>커밋 목록</returns>
88 + public virtual IEnumerable<Commit> SearchCommits() {
89 + // where => 조건에 맞는 것을 찾음(CVE-20\d\d-\d{4}로 시작하는 커밋만 골라냄)
90 + var commits = Repository.Commits
91 + .Where(c => Regex.Match(c.Message, SearchKeyword, RegexOptions.IgnoreCase).Success)
92 + .ToList();
93 +
94 + return commits;
95 + }
96 +
97 + /// <summary>
98 + /// MD5 함수
99 + /// </summary>
100 + /// <param name="str">INPUT 문자열</param>
101 + /// <returns>결과 문자열</returns>
102 + protected static string MD5HashFunc(string str) {
103 + StringBuilder MD5Str = new StringBuilder();
104 + byte[] byteArr = Encoding.ASCII.GetBytes(str);
105 + byte[] resultArr = (new MD5CryptoServiceProvider()).ComputeHash(byteArr);
106 + for (int cnti = 0; cnti < resultArr.Length; cnti++) {
107 + MD5Str.Append(resultArr[cnti].ToString("X2"));
108 + }
109 + return MD5Str.ToString();
110 + }
111 +
112 + }
113 +
114 + public class VulnC : VulnAbstractCrawler
115 + {
116 + public VulnC(string path) : base(path) {
117 +
118 + }
119 +
120 + protected override string RegexFuncPattern => throw new NotImplementedException();
121 +
122 + protected override string Extension => ".c";
123 +
124 + public override MatchCollection GetMatches(string patchCode) {
125 + throw new NotImplementedException();
126 + }
127 +
128 + public override IEnumerable<PatchEntryChanges> GetPatchEntryChanges(Patch patch) {
129 + throw new NotImplementedException();
130 + }
131 +
132 + public override (string originalFunc, string hash) GetPatchResult(Stream oldStream, string methodName) {
133 + throw new NotImplementedException();
134 + }
135 +
136 + public override string RemoveComment(string original) {
137 + throw new NotImplementedException();
138 + }
139 +
140 + protected override string GetOriginalFunc(Stream oldStream, string methodName) {
141 + throw new NotImplementedException();
142 + }
143 + }
144 + /// <summary>
145 + /// 파이썬 크롤러
146 + /// </summary>
147 + public class VulnPython : VulnAbstractCrawler
148 + {
149 + public VulnPython(string path) : base(path) {
150 + }
151 +
152 + protected override string Extension => ".py";
153 + protected override string RegexFuncPattern => $@"@@ \-(?<{OldStart}>\d+),(?<{OldLines}>\d+) \+(?<{NewStart}>\d+),(?<{NewLines}>\d+) @@ def (?<{MethodName}>\w+)";
154 +
155 + public override MatchCollection GetMatches(string patchCode) {
156 + var regs = Regex.Matches(patchCode, RegexFuncPattern);
157 + return regs;
158 + }
159 +
160 + protected override string GetOriginalFunc(Stream oldStream, string methodName) {
161 + StringBuilder oldBuilder = new StringBuilder();
162 + using (var reader = new StreamReader(oldStream)) {
163 + int defSpace = 0;
164 + while (!reader.EndOfStream) {
165 +
166 + string line = reader.ReadLine();
167 + if (defSpace > 0) {
168 + if (line.Length < defSpace) {
169 + continue;
170 + }
171 + string concat = line.Substring(0, defSpace);
172 + if (string.IsNullOrWhiteSpace(concat)) {
173 + string trim = line.Trim();
174 + // #으로 시작한다면 주석이니 제거
175 + if (trim.StartsWith("#")) {
176 + continue;
177 + }
178 + oldBuilder.AppendLine(line);
179 + } else {
180 + continue;
181 + }
182 + }
183 + if (Regex.Match(line, $@"def {methodName}\(.*\)").Success) {
184 + defSpace = line.IndexOf(methodName);
185 + oldBuilder.AppendLine(line);
186 + }
187 +
188 + }
189 +
190 + }
191 + return oldBuilder.ToString();
192 + }
193 +
194 + public override IEnumerable<PatchEntryChanges> GetPatchEntryChanges(Patch patch) {
195 +
196 + return patch.Where(e => e.Path.EndsWith(Extension)).ToList();
197 +
198 + }
199 +
200 + public override string RemoveComment(string original) {
201 +
202 + string txt = Regex.Replace(original, Environment.NewLine, "");
203 +
204 + StringBuilder sb = new StringBuilder();
205 + sb.Append("\"\"\"");
206 + sb.Append(@".*");
207 + sb.Append("\"\"\"");
208 + string replace = txt;
209 + if (Regex.Match(txt, sb.ToString()).Success) {
210 + replace = Regex.Replace(txt, sb.ToString(), "");
211 + }
212 + return replace;
213 + }
214 +
215 + public override (string originalFunc, string hash) GetPatchResult(Stream stream, string methodName) {
216 + // 패치 전 원본 함수 구하고
217 + string func = GetOriginalFunc(stream, methodName);
218 + // 주석 제거하고
219 + func = RemoveComment(func);
220 + Console.WriteLine(func);
221 + // 해쉬하고
222 + string md5 = MD5HashFunc(func);
223 + return (func, md5);
224 + }
225 + }
226 +}
...@@ -2,4 +2,5 @@ ...@@ -2,4 +2,5 @@
2 <packages> 2 <packages>
3 <package id="LibGit2Sharp" version="0.25.0" targetFramework="net461" /> 3 <package id="LibGit2Sharp" version="0.25.0" targetFramework="net461" />
4 <package id="LibGit2Sharp.NativeBinaries" version="1.0.210" targetFramework="net461" /> 4 <package id="LibGit2Sharp.NativeBinaries" version="1.0.210" targetFramework="net461" />
5 + <package id="System.ValueTuple" version="4.4.0" targetFramework="net461" />
5 </packages> 6 </packages>
...\ No newline at end of file ...\ No newline at end of file
......