diff --git a/cmd/main.go b/cmd/main.go index b06b458..600c0ba 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -2,6 +2,7 @@ package main //TODO add support for ipv6, netflow5 and IPFX import ( + "encoding/csv" "encoding/json" "flag" "fmt" @@ -9,6 +10,7 @@ import ( "log" "net/url" "os" + "sort" "stanislav" "strings" "time" @@ -140,19 +142,20 @@ func flagConfig() { func main() { flagConfig() + stanislav.Conf = &config + run2() + datasetFlowAnalysis() + return + if stanislav.FlowPath != "" { stanislav.OfflineMode() } else { - stanislav.Conf = &config //TODO handle nil stanislav.LiveMode() } gatherCaptureEndingPeriodicity() //Used to fill last possible periodic counter - - ThreatStats() - FlowStats() - dumpToFile() + datasetFlowAnalysis() } func ThreatStats() { @@ -163,26 +166,26 @@ func ThreatStats() { func FlowStats() { fmt.Println("\nPeriodic flows") - json, err := stanislav.PeriodiFlows.Marshal() + stats, err := stanislav.PeriodiFlows.Marshal() if err != nil { return } - fmt.Printf("%s", string(json)) + fmt.Printf("%s", string(stats)) } -func commonFlows() []string{ +func commonFlows(seenAtLeastX int) []string { //matching threat in coming with periodic stuff var commonThreat []string commonThreat = make([]string, 0, 100) - for k,v := range stanislav.PeriodiFlows { - k2 := strings.Split(k,"/") - k3 := k2[0] + "/"+k2[1] + for k, v := range stanislav.PeriodiFlows { + k2 := strings.Split(k, "/") + k3 := k2[0] + "/" + k2[1] if _, ok := stanislav.PossibleThreat[k3]; ok { commonThreat = append(commonThreat, k3) } else if _, ok := stanislav.PossibleThreat[k2[1]+"/"+k2[0]]; ok { commonThreat = append(commonThreat, k3) } else { - if v.PeriodicityCounter > 1 { + if v.PeriodicityCounter >= seenAtLeastX { commonThreat = append(commonThreat, k3) } } @@ -191,7 +194,7 @@ func commonFlows() []string{ return commonThreat } -func gatherCaptureEndingPeriodicity(){ +func gatherCaptureEndingPeriodicity() { //gathering all possible new periodicity, because we don't update on every entry for k, v := range stanislav.PeriodiFlows { if val, ok := stanislav.PossibleThreat[k]; ok { @@ -204,17 +207,226 @@ func gatherCaptureEndingPeriodicity(){ } } -func dumpToFile(){ +func dumpToFile() { currTime := time.Now().Format(time.RFC3339) - dumpPath := "./dump/" + currTime - if _, err := os.Stat("./dump"); os.IsNotExist(err) { - os.Mkdir("./dump", os.ModePerm) + dumpPath := "./dump/" + if _, err := os.Stat(dumpPath); os.IsNotExist(err) { + os.Mkdir(dumpPath, os.ModePerm) } + dumpPath += fmt.Sprintf("%.2f/", stanislav.Tolerance) + if _, err := os.Stat(dumpPath); os.IsNotExist(err) { + os.Mkdir(dumpPath, os.ModePerm) + } + dumpPath += currTime if _, err := os.Stat(dumpPath); os.IsNotExist(err) { os.Mkdir(dumpPath, os.ModePerm) } stanislav.WriteObjToJSONFile(dumpPath+"/periodicity_report.json", stanislav.PeriodiFlows) //TODO change this like peng that every X sec dump stanislav.WriteObjToJSONFile(dumpPath+"/threat_report.json", stanislav.PossibleThreat) - stanislav.WriteObjToJSONFile(dumpPath+"/highly_threat.json", commonFlows()) -} \ No newline at end of file + stanislav.WriteObjToJSONFile(dumpPath+"/highly_threat.json", commonFlows(1)) + dumpCsvPeriodicRecord(dumpPath + "/periodic.csv") + dumpCsvNotPeriodicRecord(dumpPath + "/not_periodic.csv") + dumpPeriodicFlowKey(dumpPath + "/periodic_keys.json") + dumpAllKeyFlow(dumpPath + "/allFlows.json") +} + +type analysis struct { + PeriodicTolerance float64 + MaliciousIpFound int + MaliciousIpTotal int + Precision float64 + Recall float64 + Accuracy float64 + TotalIpFound int + FalseNegatives int + FalsePositives int + TrueNegatives int + TruePositives int +} + +func datasetFlowAnalysis() { + //analyze only periodic flows + fmt.Println("minPeriodicity,tolerance,badip,precision,recall,accuracy,totalIp") + + for i := 0; i <= 4; i++ { //end of tn (true negative) + for j := 6; j <= 10; j++ { //end of fn (false negative) + a := periodicityFlowAnalysis(i, 5, j) //tn, fp, fn + if a.TotalIpFound != 0 { + fmt.Printf("[tn:%d,fn:%d] ", i, j) + fmt.Printf("tp: %d | tn: %d | fp: %d | fn: %d | precision: %.2f | recall: %.2f | accuracy: %.2f\n", + a.FalsePositives, a.TrueNegatives, a.FalsePositives, a.FalseNegatives, a.Precision, a.Recall, a.Accuracy) + } + } + } +} + +func (a analysis) Stats(msg string) { + //fmt.Println(msg) + fmt.Printf("tolerance: %.2f | bad ip: %d/%d | precision: %.2f | recall: %.2f | accuracy: %.2f\n", a.PeriodicTolerance, a.MaliciousIpFound, a.MaliciousIpTotal, a.Precision, a.Recall, a.Accuracy) + //fmt.Printf("%.2f,%d/%d,%.2f,%.2f,%.2f,%d", a.PeriodicTolerance, a.MaliciousIpFound, a.MaliciousIpTotal, a.Precision, a.Recall, a.Accuracy, a.TotalIpFound) +} + +//TODO move to specific script, this functions are used to create te dataset +func dumpCsvPeriodicRecord(fname string) { + file, err := os.OpenFile(fname, os.O_CREATE|os.O_RDWR, os.ModePerm) + defer file.Close() + + if err != nil { + log.Println(err) + return + } + + w := csv.NewWriter(file) + w.Comma = '|' + + //Make chronological order + keys := make([]string, 0, len(stanislav.ChronologicalOrderCsvFlows)) + for k := range stanislav.ChronologicalOrderCsvFlows { + keys = append(keys, k) + } + sort.Strings(keys) + + cronoFlows := make([][]string, 0, len(keys)) + for _, k := range keys { + for _, v := range stanislav.ChronologicalOrderCsvFlows[k] { + cronoFlows = append(cronoFlows, v) + } + } + + err = w.WriteAll(cronoFlows) + if err != nil { + log.Println(err) + } +} + +func dumpCsvNotPeriodicRecord(fname string) { + file, err := os.OpenFile(fname, os.O_CREATE|os.O_RDWR, os.ModePerm) + defer file.Close() + + if err != nil { + log.Println(err) + return + } + + w := csv.NewWriter(file) + w.Comma = '|' + + targetNumber := 100 + cronoFlows := make([][]string, 0, targetNumber) + + for k, v := range stanislav.AnalysisCsvFlow { + if _, ok := stanislav.PeriodicCsvFLows[k]; !ok { + if targetNumber-len(v) >= 0 { + cronoFlows = append(cronoFlows, v...) + targetNumber -= len(v) + } + } + if targetNumber == 0 { + break + } + } + + err = w.WriteAll(cronoFlows) + if err != nil { + log.Println(err) + } +} + +func dumpPeriodicFlowKey(fname string) { + //Make chronological order + keys := make([]string, 0, len(stanislav.PeriodiFlows)) + for k := range stanislav.PeriodiFlows { + keys = append(keys, k) + } + + stanislav.WriteObjToJSONFile(fname, keys) +} + +func dumpAllKeyFlow(fname string) { + keys := make([]string, 0, len(stanislav.AnalysisCsvFlow)) + for k := range stanislav.AnalysisCsvFlow { + keys = append(keys, k) + } + + stanislav.WriteObjToJSONFile(fname, keys) +} + +func periodicityFlowAnalysis(tn, fp, fn int) analysis { + trueNegative, truePositive := 0, 0 + falsePositive, falseNegative := 0, 0 + + for _, v := range flowSeen { + if v <= tn { //2 + trueNegative++ + } else if v <= fp { //5 + falsePositive++ + } else if v >= 6 && v <= fn { + falseNegative++ + } else { + truePositive++ + } + } + + precision := float64(truePositive) / (float64(truePositive) + float64(falsePositive)) + recall := float64(truePositive) / (float64(truePositive) + float64(falseNegative)) + + return analysis{ + Precision: precision, + Recall: recall, + Accuracy: (precision + recall) / 2, + TotalIpFound: len(flowSeen), + TruePositives: truePositive, + TrueNegatives: trueNegative, + FalseNegatives: falseNegative, + FalsePositives: falsePositive, + } +} + +var ( + flowSeen = make(map[string]int) +) + +func run2() { + path := "/media/ale/DatiD/Progetti/Progetti2019/GoPrj/stanislav/internals/dump" + dirs := stanislav.WalkAllDirs(path) + calculateFlowSeen(dirs) +} + +func calculateFlowSeen(folderPath []string) { + f, e := os.OpenFile("/media/ale/DatiD/Progetti/Progetti2019/GoPrj/stanislav/internals/allFlows.json", os.O_RDONLY, 0777) + if e != nil { + log.Fatal(e) + } + + var keys []string + r := json.NewDecoder(f) + if err := r.Decode(&keys); err != nil { + log.Fatal(err) + } + + for _, key := range keys { + flowSeen[key] = 0 + } + + for _, fpath := range folderPath { + file, err := os.OpenFile(fpath, os.O_RDONLY, 0777) + if err != nil { + log.Fatal(err) + } + + var keys []string + r := json.NewDecoder(file) + if err := r.Decode(&keys); err != nil { + log.Fatal(err) + } + + for _, key := range keys { + if _, ok := flowSeen[key]; ok { + flowSeen[key]++ + } else { + flowSeen[key] = 1 + } + } + } +} diff --git a/logic.go b/logic.go index fd827ce..5ee25f3 100644 --- a/logic.go +++ b/logic.go @@ -20,7 +20,6 @@ func (f *PeriodicFlows) Marshal() ([]byte, error) { type PeriodicFlows map[string]*FlowInfo type AllFlows map[string]*FlowInfo - type FlowInfo struct { TWDuration float64 `json:"frequency"` ServerPort uint16 `json:"server_port"` @@ -57,10 +56,11 @@ func SetTwDuration(fi *FlowInfo, rf RawFlow) { fi.LastSwitched = rf.LastSwitched } -func InspectFlow(rf RawFlow) { +//TODO remove bool return, used for analysis only +func InspectFlow(rf RawFlow) bool { if !(rf.Ipv4DstAddr != "" && rf.Ipv4SrcAddr != "0.0.0.0" && rf.Ipv4DstAddr != "0.0.0.0" && !ExcludeMultiAndBroadcast(rf.Ipv4SrcAddr) && !ExcludeMultiAndBroadcast(rf.Ipv4DstAddr)) { - return + return false } //TODO create a function that handles C2 server blocklist @@ -75,15 +75,14 @@ func InspectFlow(rf RawFlow) { //https://tools.ietf.org/html/rfc5102#section-5 if rf.EndReason == 2 { //TODO check and remove if in the map - return + return false } //https://tools.ietf.org/html/rfc5103 if rf.BiFlowDirection == 2 { //TODO check and remove if in the map - return + return false } - var key string - key = fmt.Sprintf("%s/%s/%d", rf.Ipv4SrcAddr, rf.Ipv4DstAddr, rf.PortDst) + key := fmt.Sprintf("%s/%s/%d", rf.Ipv4SrcAddr, rf.Ipv4DstAddr, rf.PortDst) if flowInfo, flowSeen := analisi[key]; flowSeen { if flowInfo.TimeWindowsExpiresAt.IsZero() { //compute new TimeWindow @@ -102,6 +101,7 @@ func InspectFlow(rf RawFlow) { if flowInfo.PeriodicityCounter >= SeenXtime { PeriodiFlows[key] = flowInfo ChangePeriodicStatus(key, flowInfo, true) + return true } } else { if flowInfo.PeriodicityCounter >= SeenXtime { @@ -119,6 +119,8 @@ func InspectFlow(rf RawFlow) { LastSwitched: rf.LastSwitched, } } + + return false } func ResetCurrentTW(key string, fi *FlowInfo, lastSwitched time.Time) { @@ -132,11 +134,15 @@ func ChangePeriodicStatus(key string, fi *FlowInfo, v bool) { return } - if v && !fi.CurrentlyPeriodic { + if v && !fi.CurrentlyPeriodic { //TODO re-enable for live mode! //AddPossibleThreat(key, fmt.Sprintf("periodic frequency: %.2fs seen %d times.", fi.TWDuration, fi.PeriodicityCounter)) - logger.Printf("%s \tbecame periodic! Seen %d times. Frequency: %.2fs ", key, fi.PeriodicityCounter, fi.TWDuration) + if Conf.Verbose > 0 { + logger.Printf("%s \tbecame periodic! Seen %d times. Frequency: %.2fs ", key, fi.PeriodicityCounter, fi.TWDuration) + } } else { - logger.Printf("%s \tnot periodic anymore! Seen %d times. Frequency: %.2fs ", key, fi.PeriodicityCounter, fi.TWDuration) + if Conf.Verbose > 0 { + logger.Printf("%s \tnot periodic anymore! Seen %d times. Frequency: %.2fs ", key, fi.PeriodicityCounter, fi.TWDuration) + } } fi.CurrentlyPeriodic = v diff --git a/offline_mode.go b/offline_mode.go index 8106ba8..a0c67a2 100644 --- a/offline_mode.go +++ b/offline_mode.go @@ -2,9 +2,17 @@ package stanislav import ( "encoding/csv" + "fmt" "log" "os" "strconv" + "strings" +) + +var ( + AnalysisCsvFlow = make(map[string][][]string) + PeriodicCsvFLows = make(map[string][][]string) + ChronologicalOrderCsvFlows = make(map[string][][]string) //they key is actually the epoch ) func OfflineMode() { @@ -14,7 +22,7 @@ func OfflineMode() { ReadFlowFiles(dirs, InspectFlow) } -func ReadFlowFiles(dirs []string, inspect func(v RawFlow)) { +func ReadFlowFiles(dirs []string, inspect func(v RawFlow) bool) { for _, fPath := range dirs { file, err := os.OpenFile(fPath, os.O_RDONLY, 0777) if err != nil { @@ -59,47 +67,98 @@ func ReadFlowFiles(dirs []string, inspect func(v RawFlow)) { end := 6 switch csvField[18] { - case "reserved": end = 0 - case "idle_timeout": end = 1 - case "active_timeout": end = 2 - case "end_of_flow_detected": end = 3 - case "forced_end": end = 4 - case "lack_of_resources": end = 5 - case "unassigned": end = 6 - default: continue + case "reserved": + end = 0 + case "idle_timeout": + end = 1 + case "active_timeout": + end = 2 + case "end_of_flow_detected": + end = 3 + case "forced_end": + end = 4 + case "lack_of_resources": + end = 5 + case "unassigned": + end = 6 + default: + continue } - - riskName :=csvField[21] + riskName := csvField[21] riskRaw, err := strconv.Atoi(csvField[22]) - if err != nil{ + if err != nil { continue } rawFlow := RawFlow{ - Ipv4SrcAddr: Ipv4SrcAddr, - Ipv4DstAddr: Ipv4DstAddr, - FirstSwitched: FirstSwitched, - PortDst: uint16(PortDst), - PortSrc: uint16(PortSrc), - InPkts: uint32(InPkts), - InBytes: uint32(InBytes), - LastSwitched: LastSwitched, - EndReason: uint8(end), + Ipv4SrcAddr: Ipv4SrcAddr, + Ipv4DstAddr: Ipv4DstAddr, + FirstSwitched: FirstSwitched, + PortDst: uint16(PortDst), + PortSrc: uint16(PortSrc), + InPkts: uint32(InPkts), + InBytes: uint32(InBytes), + LastSwitched: LastSwitched, + EndReason: uint8(end), BiFlowDirection: uint(biflow), } - InspectFlow(rawFlow) + key := fmt.Sprintf("%s/%s/%d", rawFlow.Ipv4SrcAddr, rawFlow.Ipv4DstAddr, rawFlow.PortDst) + isPeriodic := inspect(rawFlow) + + if val, ok := AnalysisCsvFlow[key]; ok { + val = append(val, csvField) + AnalysisCsvFlow[key] = val + } else { + AnalysisCsvFlow[key] = [][]string{csvField} + } + + if isPeriodic { + if val, ok := AnalysisCsvFlow[key]; ok { + if pFlows, pfOk := PeriodicCsvFLows[key]; pfOk { + lastPeriodic := strings.Join(pFlows[len(pFlows)-1], "|") + elm := [][]string{val[len(val)-2], val[len(val)-1]} + + if strings.Compare(strings.Join(elm[0], "|"), lastPeriodic) == 0 { + //put only last elm + pFlows = append(pFlows, elm[1]) + PeriodicCsvFLows[key] = pFlows + addChronologicalFlow(elm[1]) + } else { // put last 2 elem + pFlows = append(pFlows, elm...) + PeriodicCsvFLows[key] = pFlows + addChronologicalFlow(elm...) + } + } else { //case that i've seen first periodic flow + elm := [][]string{val[len(val)-3], val[len(val)-2], val[len(val)-1]} + PeriodicCsvFLows[key] = elm + addChronologicalFlow(elm...) + } + } + } if !(Ipv4DstAddr != "" && Ipv4SrcAddr != "0.0.0.0" && Ipv4DstAddr != "0.0.0.0" && !ExcludeMultiAndBroadcast(Ipv4SrcAddr) && !ExcludeMultiAndBroadcast(Ipv4DstAddr)) { continue } - risk := (riskRaw) - if risk > 0 && risk != 9 && risk != 8 && risk != 7 && risk != 128 && risk != 384 && risk!=640 &&riskName != "" { - AddPossibleThreat(Ipv4SrcAddr + "/" + Ipv4DstAddr, riskName) + risk := riskRaw + if risk > 0 && risk != 9 && risk != 8 && risk != 7 && risk != 128 && risk != 384 && risk != 640 && riskName != "" { + AddPossibleThreat(Ipv4SrcAddr+"/"+Ipv4DstAddr, riskName) } } file.Close() } } + +func addChronologicalFlow(v ...[]string) { + for _, record := range v { + firstSwitched := record[7] + if flow, ok := ChronologicalOrderCsvFlows[firstSwitched]; ok { + flow = append(flow, record) + ChronologicalOrderCsvFlows[firstSwitched] = flow + } else { + ChronologicalOrderCsvFlows[firstSwitched] = [][]string{record} + } + } +} diff --git a/pkg/dga/lms_test.go b/pkg/dga/lms_test.go index 8409783..401f2c2 100644 --- a/pkg/dga/lms_test.go +++ b/pkg/dga/lms_test.go @@ -15,7 +15,7 @@ func TestLmsScore(t *testing.T) { args args want float64 }{ - // TODO: Add test cases. + // TODO: Add stanislav-dataset cases. { name: "test1", args: args{subject: "/quDJa5xQ8bf9um/nKl3/rRPiY6OpgXFX/Ns2bkVRfNXr0/MRh2tGEOHDpyEnsgKE/"}, @@ -30,4 +30,3 @@ func TestLmsScore(t *testing.T) { }) } } -