노현종

VulnAbstractCrawler

......@@ -15,145 +15,115 @@ namespace VulnCrawler
static void Main(string[] args) {
Run();
}
using (var r = new Repository(@"c:\test2")) {
var commits = r.Commits
.Where(c => Regex.Match(c.Message, @"CVE-20\d\d-\d{4}", RegexOptions.IgnoreCase).Success)
//.Where(c => c.Message.IndexOf("CVE-20",
//StringComparison.CurrentCultureIgnoreCase) >= 0)
.ToList();
Console.WriteLine(commits.Count);
foreach (var commit in commits) {
public static void Run() {
// Repository 폴더들이 있는 주소를 지정하면 하위 폴더 목록을 가져옴(Repository 목록)
var directorys = Directory.GetDirectories(@"c:\VulnPy");
if (directorys.Length == 0) {
Console.WriteLine("Repository 목록 찾기 실패");
return;
}
// Repository 목록 만큼 반복함.
foreach (var directory in directorys) {
var pyCrawl = new VulnPython(directory);
var commits = pyCrawl.Commits;
foreach (var commit in commits) {
// 커밋 메시지
string message = commit.Message;
Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine($"Commit Message: {message}");
Console.ResetColor();
foreach (var parent in commit.Parents) {
var patch = r.Diff.Compare<Patch>(parent.Tree, commit.Tree, new CompareOptions { });
// 부모 커밋과 현재 커밋을 Compare 하여 패치 내역을 가져옴
var patch = pyCrawl.Repository.Diff.Compare<Patch>(parent.Tree, commit.Tree);
// 패치 엔트리 파일 배열 중에 파일 확장자가 .py인 것만 가져옴
// (실질적인 코드 변경 커밋만 보기 위해서)
var entrys = pyCrawl.GetPatchEntryChanges(patch);
// 현재 커밋에 대한 패치 엔트리 배열을 출력함
PrintPatchEntrys(entrys, pyCrawl);
}
}
}
}
public static void PrintPatchEntrys(IEnumerable<PatchEntryChanges> entrys, VulnAbstractCrawler pyCrawl) {
var entrys = patch.Where(e => e.Path.EndsWith(".py"));
foreach (var entry in entrys) {
// 현재 패치 엔트리 정보 출력(추가된 줄 수, 삭제된 줄 수, 패치 이전 경로, 패치 후 경로)
Console.ForegroundColor = ConsoleColor.Blue;
Console.WriteLine($"status: {entry.Status.ToString()}");
Console.WriteLine($"added: {entry.LinesAdded.ToString()}, deleted: {entry.LinesDeleted.ToString()}");
Console.WriteLine($"old path: {entry.OldPath.ToString()}, new path: {entry.Path.ToString()}");
Console.ResetColor();
// 기존 소스코드
var oldOid = entry.OldOid;
Blob oldBlob = r.Lookup<Blob>(oldOid);
Blob oldBlob = pyCrawl.Repository.Lookup<Blob>(oldOid);
string oldContent = oldBlob.GetContentText();
// 변경된 소스코드
var newOid = entry.Oid;
Blob newBlob = r.Lookup<Blob>(newOid);
Blob newBlob = pyCrawl.Repository.Lookup<Blob>(newOid);
string newContent = newBlob.GetContentText();
// @@ -290,8 + 290,12 @@ def i
// @@ -290,8 +290,12 @@ def is_safe_url(url, host=None):
// 정규식(파이썬 함수만 걸러냄), 위 형식에서 290,8은 290은 시작줄, 8은 라인수, -는 변경전 +는 변경후
var regs = Regex.Matches(entry.Patch, @"@@ \-(?<oldStart>\d+),(?<oldLines>\d+) \+(?<newStart>\d+),(?<newLines>\d+) @@ def (?<methodName>\w+)");
if (regs.Count > 0) {
Console.BackgroundColor = ConsoleColor.DarkBlue;
Console.WriteLine($"Old Content: \n{oldContent}");
Console.ResetColor();
Console.BackgroundColor = ConsoleColor.DarkMagenta;
Console.WriteLine($"New Content: \n{newContent}");
Console.ResetColor();
Console.BackgroundColor = ConsoleColor.DarkRed;
Console.WriteLine($"Patched: \n{entry.Patch}");
Console.ResetColor();
Console.WriteLine("-----------");
Console.WriteLine(regs.Count);
}
var regs = pyCrawl.GetMatches(entry.Patch);
// 패치 전 코드 (oldContent)
// 패치 후 코드 (newContent)
// 패치 코드 (entry.Patch)
// 출력
//if (regs.Count > 0) {
// Console.BackgroundColor = ConsoleColor.DarkBlue;
// Console.WriteLine($"Old Content: \n{oldContent}");
// Console.ResetColor();
// Console.BackgroundColor = ConsoleColor.DarkMagenta;
// Console.WriteLine($"New Content: \n{newContent}");
// Console.ResetColor();
// Console.BackgroundColor = ConsoleColor.DarkRed;
// Console.WriteLine($"Patched: \n{entry.Patch}");
// Console.ResetColor();
// Console.WriteLine("-----------");
// Console.WriteLine(regs.Count);
//}
// 패치 코드에서 매칭된 파이썬 함수들로부터
// 패치 전 코드 파일(oldBlob)을 탐색하여 원본 파이썬 함수 가져오고(originalFunc)
//
foreach (var reg in regs) {
var match = reg as Match;
int.TryParse(match.Groups["oldStart"].Value, out int oldStart);
int.TryParse(match.Groups["oldLines"].Value, out int oldLines);
string methodName = match.Groups["methodName"].Value;
Console.WriteLine(match.Groups["oldStart"].Value);
Console.WriteLine(match.Groups["oldLines"].Value);
Console.WriteLine(match.Groups["newStart"].Value);
Console.WriteLine(match.Groups["newLines"].Value);
Console.WriteLine(match.Groups["methodName"].Value);
StringBuilder oldBuilder = new StringBuilder();
using (var reader = new StreamReader(oldBlob.GetContentStream())) {
int readCount = 0;
int defSpace = 0;
while (!reader.EndOfStream && readCount <= oldStart + oldLines) {
string line = reader.ReadLine();
if (defSpace > 0) {
if (line.Length < defSpace) {
continue;
}
string concat = line.Substring(0, defSpace);
if (string.IsNullOrWhiteSpace(concat)) {
string trim = line.Trim();
if (trim.StartsWith("#")) {
continue;
}
string methodName = match.Groups[VulnAbstractCrawler.MethodName].Value;
oldBuilder.Append(line);
}
else {
continue;
}
}
if (Regex.Match(line, $@"def {methodName}\(.*\)").Success) {
defSpace = line.IndexOf(methodName);
oldBuilder.Append(line);
}
string originalFunc, md5;
}
(originalFunc, md5) = pyCrawl.GetPatchResult(oldBlob.GetContentStream(),
match.Groups[VulnAbstractCrawler.MethodName].Value);
}
// 패치 전 원본 함수
Console.WriteLine($"Original Func: {originalFunc}");
// 해쉬 후
Console.WriteLine($"Original Func MD5: {md5}");
StringBuilder sb = new StringBuilder();
sb.Append("\"\"\"");
sb.Append(@".*");
sb.Append("\"\"\"");
if (Regex.Match(oldBuilder.ToString(), sb.ToString()).Success) {
string replace = Regex.Replace(oldBuilder.ToString(), sb.ToString(), "");
replace = Regex.Replace(replace, " ", "");
Console.WriteLine($"Builder: \n{replace}");
string md5 = MD5HashFunc(replace);
Console.WriteLine($"MD5: {md5}");
}
}
Console.WriteLine("-----------");
Console.ResetColor();
}
//Console.WriteLine(patch.Content);
}
Console.WriteLine($"Commit {commit.Sha} 추출 완료");
// Task.Delay(1000).Wait();
//break;
}
}
}
public static string MD5HashFunc(string str) {
StringBuilder MD5Str = new StringBuilder();
byte[] byteArr = Encoding.ASCII.GetBytes(str);
byte[] resultArr = (new MD5CryptoServiceProvider()).ComputeHash(byteArr);
//for (int cnti = 1; cnti < resultArr.Length; cnti++) (2010.06.27)
for (int cnti = 0; cnti < resultArr.Length; cnti++) {
MD5Str.Append(resultArr[cnti].ToString("X2"));
}
return MD5Str.ToString();
}
/// <summary>
/// 디렉토리 삭제 함수
/// </summary>
/// <param name="targetDir"></param>
public static void DeleteDirectory(string targetDir) {
File.SetAttributes(targetDir, FileAttributes.Normal);
......@@ -171,6 +141,12 @@ namespace VulnCrawler
Directory.Delete(targetDir, false);
}
/// <summary>
/// Clone 콜백 함수
/// </summary>
/// <param name="progress"></param>
/// <returns></returns>
public static bool TransferProgress(TransferProgress progress) {
int totalBytes = progress.TotalObjects;
int receivedBytes = progress.ReceivedObjects;
......
......@@ -38,8 +38,13 @@
<Reference Include="LibGit2Sharp, Version=0.25.0.0, Culture=neutral, PublicKeyToken=7cbde695407f0333, processorArchitecture=MSIL">
<HintPath>..\packages\LibGit2Sharp.0.25.0\lib\netstandard2.0\LibGit2Sharp.dll</HintPath>
</Reference>
<Reference Include="MySql.Data, Version=8.0.10.0, Culture=neutral, PublicKeyToken=c5687fc88969c44d, processorArchitecture=MSIL" />
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.ValueTuple, Version=4.0.2.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
<HintPath>..\packages\System.ValueTuple.4.4.0\lib\net461\System.ValueTuple.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
......@@ -50,6 +55,7 @@
<ItemGroup>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="VulnPython.cs" />
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
......
using LibGit2Sharp;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace VulnCrawler
{
// 추상 클래스
public abstract class VulnAbstractCrawler {
/// <summary>
/// 생성자
/// 경로를 입력받아서(path)
/// 레파지토리를 초기화하고
/// 커밋 목록을 검색함
/// </summary>
/// <param name="path"></param>
public VulnAbstractCrawler(string path) {
Repository = new Repository(path);
Commits = SearchCommits();
}
// 소멸자
~VulnAbstractCrawler() {
Repository.Dispose();
}
// 정규식 그룹화
// @@ -oldStart,oldLines +newStart,newLines @@ MethodName():
public static string OldStart => "oldStart";
public static string OldLines => "oldLines";
public static string NewStart => "newStart";
public static string NewLines => "newLines";
public static string MethodName => "methodName";
/// <summary>
/// 레파지토리
/// </summary>
public Repository Repository { get; private set; }
/// <summary>
/// 커밋 목록
/// </summary>
public IEnumerable<Commit> Commits { get; private set; }
/// <summary>
/// 커밋에서 검색할 정규식 문자열
/// </summary>
protected string SearchKeyword => @"CVE-20\d\d-\d{4}";
/// <summary>
/// 패치 코드에서 함수 찾을 정규식 패턴 문자열
/// </summary>
protected abstract string RegexFuncPattern { get; }
protected abstract string Extension { get; }
public abstract IEnumerable<PatchEntryChanges> GetPatchEntryChanges(Patch patch);
/// <summary>
/// 정규식을 이용하여 @@ -\d,\d +\d,\d @@ MethodName(): 이런 패턴을 찾고
/// 그룹화 하여 반환함 (OldStart, OldLines, NewStart, NewLines, MethodName
/// </summary>
/// <param name="patchCode">찾을 코드</param>
/// <returns>정규식 그룹 컬렉션</returns>
public abstract MatchCollection GetMatches(string patchCode);
/// <summary>
/// 파일스트림으로 부터 원본 함수 구하는 함수
/// </summary>
/// <param name="oldStream">파일 스트림</param>
/// <param name="methodName">찾을 메서드 이름</param>
/// <returns>함수 문자열</returns>
protected abstract string GetOriginalFunc(Stream oldStream, string methodName);
public abstract (string originalFunc, string hash) GetPatchResult(Stream oldStream, string methodName);
/// <summary>
/// 주석 제거 함수
/// </summary>
/// <param name="original">제거할 문자열</param>
/// <returns>결과 문자열</returns>
public abstract string RemoveComment(string original);
/// <summary>
/// 커밋 검색 함수(정규식 사용)
/// 정규식은 SearchKeyword 사용함
/// </summary>
/// <returns>커밋 목록</returns>
public virtual IEnumerable<Commit> SearchCommits() {
// where => 조건에 맞는 것을 찾음(CVE-20\d\d-\d{4}로 시작하는 커밋만 골라냄)
var commits = Repository.Commits
.Where(c => Regex.Match(c.Message, SearchKeyword, RegexOptions.IgnoreCase).Success)
.ToList();
return commits;
}
/// <summary>
/// MD5 함수
/// </summary>
/// <param name="str">INPUT 문자열</param>
/// <returns>결과 문자열</returns>
protected static string MD5HashFunc(string str) {
StringBuilder MD5Str = new StringBuilder();
byte[] byteArr = Encoding.ASCII.GetBytes(str);
byte[] resultArr = (new MD5CryptoServiceProvider()).ComputeHash(byteArr);
for (int cnti = 0; cnti < resultArr.Length; cnti++) {
MD5Str.Append(resultArr[cnti].ToString("X2"));
}
return MD5Str.ToString();
}
}
public class VulnC : VulnAbstractCrawler
{
public VulnC(string path) : base(path) {
}
protected override string RegexFuncPattern => throw new NotImplementedException();
protected override string Extension => ".c";
public override MatchCollection GetMatches(string patchCode) {
throw new NotImplementedException();
}
public override IEnumerable<PatchEntryChanges> GetPatchEntryChanges(Patch patch) {
throw new NotImplementedException();
}
public override (string originalFunc, string hash) GetPatchResult(Stream oldStream, string methodName) {
throw new NotImplementedException();
}
public override string RemoveComment(string original) {
throw new NotImplementedException();
}
protected override string GetOriginalFunc(Stream oldStream, string methodName) {
throw new NotImplementedException();
}
}
/// <summary>
/// 파이썬 크롤러
/// </summary>
public class VulnPython : VulnAbstractCrawler
{
public VulnPython(string path) : base(path) {
}
protected override string Extension => ".py";
protected override string RegexFuncPattern => $@"@@ \-(?<{OldStart}>\d+),(?<{OldLines}>\d+) \+(?<{NewStart}>\d+),(?<{NewLines}>\d+) @@ def (?<{MethodName}>\w+)";
public override MatchCollection GetMatches(string patchCode) {
var regs = Regex.Matches(patchCode, RegexFuncPattern);
return regs;
}
protected override string GetOriginalFunc(Stream oldStream, string methodName) {
StringBuilder oldBuilder = new StringBuilder();
using (var reader = new StreamReader(oldStream)) {
int defSpace = 0;
while (!reader.EndOfStream) {
string line = reader.ReadLine();
if (defSpace > 0) {
if (line.Length < defSpace) {
continue;
}
string concat = line.Substring(0, defSpace);
if (string.IsNullOrWhiteSpace(concat)) {
string trim = line.Trim();
// #으로 시작한다면 주석이니 제거
if (trim.StartsWith("#")) {
continue;
}
oldBuilder.AppendLine(line);
} else {
continue;
}
}
if (Regex.Match(line, $@"def {methodName}\(.*\)").Success) {
defSpace = line.IndexOf(methodName);
oldBuilder.AppendLine(line);
}
}
}
return oldBuilder.ToString();
}
public override IEnumerable<PatchEntryChanges> GetPatchEntryChanges(Patch patch) {
return patch.Where(e => e.Path.EndsWith(Extension)).ToList();
}
public override string RemoveComment(string original) {
string txt = Regex.Replace(original, Environment.NewLine, "");
StringBuilder sb = new StringBuilder();
sb.Append("\"\"\"");
sb.Append(@".*");
sb.Append("\"\"\"");
string replace = txt;
if (Regex.Match(txt, sb.ToString()).Success) {
replace = Regex.Replace(txt, sb.ToString(), "");
}
return replace;
}
public override (string originalFunc, string hash) GetPatchResult(Stream stream, string methodName) {
// 패치 전 원본 함수 구하고
string func = GetOriginalFunc(stream, methodName);
// 주석 제거하고
func = RemoveComment(func);
Console.WriteLine(func);
// 해쉬하고
string md5 = MD5HashFunc(func);
return (func, md5);
}
}
}
......@@ -2,4 +2,5 @@
<packages>
<package id="LibGit2Sharp" version="0.25.0" targetFramework="net461" />
<package id="LibGit2Sharp.NativeBinaries" version="1.0.210" targetFramework="net461" />
<package id="System.ValueTuple" version="4.4.0" targetFramework="net461" />
</packages>
\ No newline at end of file
......