노현종

VulnAbstractCrawler

This diff is collapsed. Click to expand it.
......@@ -38,8 +38,13 @@
<Reference Include="LibGit2Sharp, Version=0.25.0.0, Culture=neutral, PublicKeyToken=7cbde695407f0333, processorArchitecture=MSIL">
<HintPath>..\packages\LibGit2Sharp.0.25.0\lib\netstandard2.0\LibGit2Sharp.dll</HintPath>
</Reference>
<Reference Include="MySql.Data, Version=8.0.10.0, Culture=neutral, PublicKeyToken=c5687fc88969c44d, processorArchitecture=MSIL" />
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.ValueTuple, Version=4.0.2.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
<HintPath>..\packages\System.ValueTuple.4.4.0\lib\net461\System.ValueTuple.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
......@@ -50,6 +55,7 @@
<ItemGroup>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="VulnPython.cs" />
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
......
using LibGit2Sharp;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace VulnCrawler
{
// 추상 클래스
public abstract class VulnAbstractCrawler {
/// <summary>
/// 생성자
/// 경로를 입력받아서(path)
/// 레파지토리를 초기화하고
/// 커밋 목록을 검색함
/// </summary>
/// <param name="path"></param>
public VulnAbstractCrawler(string path) {
Repository = new Repository(path);
Commits = SearchCommits();
}
// 소멸자
~VulnAbstractCrawler() {
Repository.Dispose();
}
// 정규식 그룹화
// @@ -oldStart,oldLines +newStart,newLines @@ MethodName():
public static string OldStart => "oldStart";
public static string OldLines => "oldLines";
public static string NewStart => "newStart";
public static string NewLines => "newLines";
public static string MethodName => "methodName";
/// <summary>
/// 레파지토리
/// </summary>
public Repository Repository { get; private set; }
/// <summary>
/// 커밋 목록
/// </summary>
public IEnumerable<Commit> Commits { get; private set; }
/// <summary>
/// 커밋에서 검색할 정규식 문자열
/// </summary>
protected string SearchKeyword => @"CVE-20\d\d-\d{4}";
/// <summary>
/// 패치 코드에서 함수 찾을 정규식 패턴 문자열
/// </summary>
protected abstract string RegexFuncPattern { get; }
protected abstract string Extension { get; }
public abstract IEnumerable<PatchEntryChanges> GetPatchEntryChanges(Patch patch);
/// <summary>
/// 정규식을 이용하여 @@ -\d,\d +\d,\d @@ MethodName(): 이런 패턴을 찾고
/// 그룹화 하여 반환함 (OldStart, OldLines, NewStart, NewLines, MethodName
/// </summary>
/// <param name="patchCode">찾을 코드</param>
/// <returns>정규식 그룹 컬렉션</returns>
public abstract MatchCollection GetMatches(string patchCode);
/// <summary>
/// 파일스트림으로 부터 원본 함수 구하는 함수
/// </summary>
/// <param name="oldStream">파일 스트림</param>
/// <param name="methodName">찾을 메서드 이름</param>
/// <returns>함수 문자열</returns>
protected abstract string GetOriginalFunc(Stream oldStream, string methodName);
public abstract (string originalFunc, string hash) GetPatchResult(Stream oldStream, string methodName);
/// <summary>
/// 주석 제거 함수
/// </summary>
/// <param name="original">제거할 문자열</param>
/// <returns>결과 문자열</returns>
public abstract string RemoveComment(string original);
/// <summary>
/// 커밋 검색 함수(정규식 사용)
/// 정규식은 SearchKeyword 사용함
/// </summary>
/// <returns>커밋 목록</returns>
public virtual IEnumerable<Commit> SearchCommits() {
// where => 조건에 맞는 것을 찾음(CVE-20\d\d-\d{4}로 시작하는 커밋만 골라냄)
var commits = Repository.Commits
.Where(c => Regex.Match(c.Message, SearchKeyword, RegexOptions.IgnoreCase).Success)
.ToList();
return commits;
}
/// <summary>
/// MD5 함수
/// </summary>
/// <param name="str">INPUT 문자열</param>
/// <returns>결과 문자열</returns>
protected static string MD5HashFunc(string str) {
StringBuilder MD5Str = new StringBuilder();
byte[] byteArr = Encoding.ASCII.GetBytes(str);
byte[] resultArr = (new MD5CryptoServiceProvider()).ComputeHash(byteArr);
for (int cnti = 0; cnti < resultArr.Length; cnti++) {
MD5Str.Append(resultArr[cnti].ToString("X2"));
}
return MD5Str.ToString();
}
}
public class VulnC : VulnAbstractCrawler
{
public VulnC(string path) : base(path) {
}
protected override string RegexFuncPattern => throw new NotImplementedException();
protected override string Extension => ".c";
public override MatchCollection GetMatches(string patchCode) {
throw new NotImplementedException();
}
public override IEnumerable<PatchEntryChanges> GetPatchEntryChanges(Patch patch) {
throw new NotImplementedException();
}
public override (string originalFunc, string hash) GetPatchResult(Stream oldStream, string methodName) {
throw new NotImplementedException();
}
public override string RemoveComment(string original) {
throw new NotImplementedException();
}
protected override string GetOriginalFunc(Stream oldStream, string methodName) {
throw new NotImplementedException();
}
}
/// <summary>
/// 파이썬 크롤러
/// </summary>
public class VulnPython : VulnAbstractCrawler
{
public VulnPython(string path) : base(path) {
}
protected override string Extension => ".py";
protected override string RegexFuncPattern => $@"@@ \-(?<{OldStart}>\d+),(?<{OldLines}>\d+) \+(?<{NewStart}>\d+),(?<{NewLines}>\d+) @@ def (?<{MethodName}>\w+)";
public override MatchCollection GetMatches(string patchCode) {
var regs = Regex.Matches(patchCode, RegexFuncPattern);
return regs;
}
protected override string GetOriginalFunc(Stream oldStream, string methodName) {
StringBuilder oldBuilder = new StringBuilder();
using (var reader = new StreamReader(oldStream)) {
int defSpace = 0;
while (!reader.EndOfStream) {
string line = reader.ReadLine();
if (defSpace > 0) {
if (line.Length < defSpace) {
continue;
}
string concat = line.Substring(0, defSpace);
if (string.IsNullOrWhiteSpace(concat)) {
string trim = line.Trim();
// #으로 시작한다면 주석이니 제거
if (trim.StartsWith("#")) {
continue;
}
oldBuilder.AppendLine(line);
} else {
continue;
}
}
if (Regex.Match(line, $@"def {methodName}\(.*\)").Success) {
defSpace = line.IndexOf(methodName);
oldBuilder.AppendLine(line);
}
}
}
return oldBuilder.ToString();
}
public override IEnumerable<PatchEntryChanges> GetPatchEntryChanges(Patch patch) {
return patch.Where(e => e.Path.EndsWith(Extension)).ToList();
}
public override string RemoveComment(string original) {
string txt = Regex.Replace(original, Environment.NewLine, "");
StringBuilder sb = new StringBuilder();
sb.Append("\"\"\"");
sb.Append(@".*");
sb.Append("\"\"\"");
string replace = txt;
if (Regex.Match(txt, sb.ToString()).Success) {
replace = Regex.Replace(txt, sb.ToString(), "");
}
return replace;
}
public override (string originalFunc, string hash) GetPatchResult(Stream stream, string methodName) {
// 패치 전 원본 함수 구하고
string func = GetOriginalFunc(stream, methodName);
// 주석 제거하고
func = RemoveComment(func);
Console.WriteLine(func);
// 해쉬하고
string md5 = MD5HashFunc(func);
return (func, md5);
}
}
}
......@@ -2,4 +2,5 @@
<packages>
<package id="LibGit2Sharp" version="0.25.0" targetFramework="net461" />
<package id="LibGit2Sharp.NativeBinaries" version="1.0.210" targetFramework="net461" />
<package id="System.ValueTuple" version="4.4.0" targetFramework="net461" />
</packages>
\ No newline at end of file
......