diff --git a/Makefile.am b/Makefile.am index 27077d05..4495304b 100755 --- a/Makefile.am +++ b/Makefile.am @@ -1,6 +1,6 @@ ACLOCAL_AMFLAGS = -I m4 -SUBDIRS = src/lib src/output src/netflow src/collector src/decode src/maxmind src/nfdump src/nfcapd +SUBDIRS = src/lib src/output src/netflow src/collector src/decode src/maxmind src/tor src/nfdump src/nfcapd SUBDIRS += src/nfanon src/nfexpire src/nfreplay . src src/test src/nfreader src/inline src/include if SFLOW diff --git a/configure.ac b/configure.ac index 0d0ed808..eb46f9fa 100644 --- a/configure.ac +++ b/configure.ac @@ -168,11 +168,23 @@ build_ftconv="yes" , build_ftconv="no" AM_CONDITIONAL([FT2NFDUMP], [test "x$build_ftconv" = "xyes"]) AC_ARG_ENABLE(maxmind, -[ --enable-maxmind Build geolookup for MaxMind GeoDB; default is NO]) +[ --enable-maxmind Build geolookup for MaxMind GeoDB; default is NO], +build_maxmind="yes" +AM_CONDITIONAL([MAXMIND], true) +, +build_maxmind="no" +AM_CONDITIONAL([MAXMIND], false) +) -AS_IF([test "x$enable_maxmind" = xyes], -build_maxmind="$enable_maxmind", build_maxmind="no") -AM_CONDITIONAL([MAXMIND], [test "x$build_maxmind" = "xyes"]) +AC_ARG_ENABLE(tor, +[ --enable-tor Build torlookup for tor onion lookups; default is NO], +build_tor="yes" +AM_CONDITIONAL(TORLOOKUP, true) +CFLAGS="$CFLAGS -DBUILDTOR" +, +build_tor="no" +AM_CONDITIONAL(TORLOOKUP, false) +) AC_ARG_ENABLE(ja4, [ --enable-ja4 Build with ja4 fingerprinting code; May require a license; default is NO]) @@ -654,23 +666,23 @@ echo "" echo "----------------------------------" echo " Build Settings for ${PACKAGE_TARNAME} v${PACKAGE_VERSION}" echo "----------------------------------" -echo " host type = $host_os" -echo " install dir = $prefix" -echo " CC = $CC" -echo " CFLAGS = $AM_CFLAGS $CFLAGS" -echo " CPPFLAGS = $AM_CPPFLAGS $CPPFLAGS" -echo " LDFLAGS = $AM_LDFLAGS $LDFLAGS" -echo " LIBS = $LIBS" -echo " Enable liblz4 = $use_lz4" -echo " Enable libbz2 = $use_bzip2" -echo " Enable libzstd = $use_zstd" -echo " Enable ja4 = $build_ja4" -echo " Build geolookup = $build_maxmind" -echo " Build sflow = $build_sflow" -echo " Build nfpcapd = $build_nfpcapd" -echo " Build flowtools conv = $build_ftconv" -echo " Build nfprofile = $build_nfprofile" -echo " Build ft2nfdump = $build_ftconv" +echo " host type = $host_os" +echo " install dir = $prefix" +echo " CC = $CC" +echo " CFLAGS = $AM_CFLAGS $CFLAGS" +echo " CPPFLAGS = $AM_CPPFLAGS $CPPFLAGS" +echo " LDFLAGS = $AM_LDFLAGS $LDFLAGS" +echo " LIBS = $LIBS" +echo " Enable liblz4 = $use_lz4" +echo " Enable libbz2 = $use_bzip2" +echo " Enable libzstd = $use_zstd" +echo " Enable ja4 = $build_ja4" +echo " Build geolookup = $build_maxmind" +echo " Build torlookup = $build_tor" +echo " Build sflow = $build_sflow" +echo " Build nfpcapd = $build_nfpcapd" +echo " Build nfprofile = $build_nfprofile" +echo " Build ft2nfdump = $build_ftconv" echo "----------------------------------" echo "" echo " You can run ./make now." diff --git a/man/geolookup.1 b/man/geolookup.1 index bb8614b8..0656ecf3 100755 --- a/man/geolookup.1 +++ b/man/geolookup.1 @@ -1,4 +1,4 @@ -.\" Copyright (c) 2022, Peter Haag +.\" Copyright (c) 2024, Peter Haag .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without @@ -33,28 +33,28 @@ .Nd geo location lookup for IP addresses .Sh SYNOPSIS .Nm -.Op Fl G Ar geofile +.Op Fl G Ar geoDBfile .Ar iplist .Nm .Fl d Ar directory -.Fl w Ar geofile +.Fl w Ar geoDBfile .Sh DESCRIPTION .Nm is a tool to lookup AS and geo location information of one or more IP addresses. You need -to create a lookup database first, in order to use +to create the nfdump specific lookup database first, before using .Nm .Pp .Nm is also used to create the nfdump formatted lookup database file from the maxmind csv files. You need to have a maxmind account at https://maxmind.com, in order to download the relevant -csv file. See below for building instructions. +csv file. See the section below for the building instructions. .Pp .Nm accepts a list of IP addresses either on the command line, separated by spaces or on .Ar stdin line by line. The IP address on each line can be embedded in a string separated be -spaces on the left and right, therefore it can read the piped output from another tool. +spaces on the left and right, therefore it can read the piped output from other tools. .Pp The options are as follows: .Bl -tag -width Ds @@ -64,10 +64,10 @@ Use the csv files in to build the binary lookup database file. With this argument .Nm creates a new binary lookup database file. -.It Fl w Ar geofile +.It Fl w Ar geoDBfile Name of the new lookup database file. -.It Fl G Ar geofile -Use the binary geofile as lookup database for the current AS and location lookups. +.It Fl G Ar geoDBfile +Use the binary geoDBfile as lookup database for the current AS and location lookups. .El .Pp To specify the geo lookup database diff --git a/man/nfdump.1 b/man/nfdump.1 index 97defd35..3935cd44 100755 --- a/man/nfdump.1 +++ b/man/nfdump.1 @@ -1,4 +1,4 @@ -\" Copyright (c) 2023, Peter Haag +\" Copyright (c) 2024, Peter Haag .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without @@ -49,6 +49,7 @@ .Op Fl I .Op Fl D Ar nameserver .Op Fl G Ar geoDB +.Op Fl H Ar torDB .Op Fl s Ar statistic .Op Fl n Ar num .Op Fl o Ar format @@ -370,12 +371,42 @@ for the path of The option .Fl G overwrites -.Ar NFGEODB. +.Ar NFGEODB +or +.Ar geodb.path +in +.Ar nfdump.conf. In order to prevent reading any .Ar geoDB file, even if it would exist set .Fl G .Sy none. +See also +.Ar geolookup(1) +.It Fl H Ar torDB +Use +.Ar torDB +as tor lookup DB for tor exit node lookups. +.Nm +tries to read the environment variable +.Ar NFTORDB +for the path of +.Ar torDB. +The option +.Fl H +overwrites +.Ar NFTORDB +or +.Ar tordb.path +in +.Ar nfdump.conf. +In order to prevent reading any +.Ar torDB +file, even if it would exist set +.Fl G +.Sy none. +See also +.Ar torlookup(1) .It Fl s Ar statistic Op Ar :p Op Ar /orderby Generate the Top N flow record or flow element statistic. By optionally adding .Sy :p @@ -940,15 +971,15 @@ or .Cm dst the source or destination port may match. .Pp -.It Cm icmp-type Ar num -.It Cm icmp-code Ar num +.It Cm icmp type Ar num +.It Cm icmp code Ar num True if the respective icmp field of the record matches .Ar num. This automatically implies .Cm proto icmp. .Pp -.It Cm engine-type Ar num -.It Cm engine-id Ar num +.It Cm engine type Ar num +.It Cm engine id Ar num .It Cm sysid Ar num True if the respective fields of the record matches .Ar num @@ -1688,6 +1719,10 @@ dst IP geo location info src AS organisation name .It Cm %dasn dst AS organisation name +.It Cm %stor +src IP 2 letter tor exit info: TX tor exit node +.It Cm %dtor +dst IP 2 letter tor exit info: TX tor exit node .It Cm %n new line char \\n .It Cm %ipl diff --git a/src/lib/nffile.c b/src/lib/nffile.c index 1b3998cc..ac0c4b95 100644 --- a/src/lib/nffile.c +++ b/src/lib/nffile.c @@ -89,7 +89,7 @@ (a)->flags = 0; \ (a)->type = DATA_BLOCK_TYPE_3; -static const char *nf_creator[MAX_CREATOR] = {"unknown", "nfcapd", "nfpcapd", "sfcapd", "nfdump", "nfanon", "nfprofile", "geolookup", "ft2nfdump"}; +static const char *nf_creator[MAX_CREATOR] = {"unknown", "nfcapd", "nfpcapd", "sfcapd", "nfdump", "nfanon", "nfprofile", "geolookup", "ft2nfdump", "torlookup"}; static unsigned NumWorkers = DEFAULTWORKERS; diff --git a/src/lib/nffileV2.h b/src/lib/nffileV2.h index 31cd178c..8eba33a9 100755 --- a/src/lib/nffileV2.h +++ b/src/lib/nffileV2.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, Peter Haag + * Copyright (c) 2024, Peter Haag * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -87,7 +87,8 @@ typedef struct fileHeaderV2_s { #define CREATOR_NFPROFILE 6 #define CREATOR_LOOKUP 7 #define CREATOR_FT2NFDUMP 8 -#define MAX_CREATOR 9 +#define CREATOR_TORLOOKUP 9 +#define MAX_CREATOR 10 off_t offAppendix; // offset in file for appendix blocks with additional data uint32_t BlockSize; // max block size of data blocks diff --git a/src/nfdump/Makefile.am b/src/nfdump/Makefile.am index 1a69919b..b85e336a 100755 --- a/src/nfdump/Makefile.am +++ b/src/nfdump/Makefile.am @@ -1,7 +1,7 @@ bin_PROGRAMS = nfdump -AM_CPPFLAGS = -I.. -Icompat_1_6_x -I../include -I../lib -I../output -I../maxmind -I../netflow -I../collector -I../lib/conf -I../lib/filter -I../decode -I../inline $(DEPS_CFLAGS) +AM_CPPFLAGS = -I.. -Icompat_1_6_x -I../include -I../lib -I../output -I../maxmind -I../tor -I../netflow -I../collector -I../lib/conf -I../lib/filter -I../decode -I../inline $(DEPS_CFLAGS) AM_LDFLAGS = -L../lib EXTRA_DIST = nffile_compat.c memhandle.c heapsort_inline.c @@ -19,6 +19,6 @@ compat = compat_1_6_x/nfx.h compat_1_6_x/nfx.c nfdump_SOURCES = nfdump.c spin_lock.h \ $(exporter) $(nbar) $(ifvrf) $(nfstat) $(nflowcache) $(nfprof) $(sort) $(compat) -nfdump_LDADD = ../output/liboutput.a ../lib/libnfdump.la ../maxmind/libmaxmind.a ../decode/libnfdecode.a +nfdump_LDADD = ../output/liboutput.a ../lib/libnfdump.la ../maxmind/libmaxmind.a ../tor/libtorlookup.a ../decode/libnfdecode.a CLEANFILES = *.gch diff --git a/src/nfdump/nfdump.c b/src/nfdump/nfdump.c index f669efbc..9da77473 100644 --- a/src/nfdump/nfdump.c +++ b/src/nfdump/nfdump.c @@ -69,6 +69,7 @@ #include "nfx.h" #include "nfxV3.h" #include "output.h" +#include "tor.h" #include "util.h" #include "version.h" @@ -119,6 +120,7 @@ static void usage(char *name) { "-c\t\tLimit number of matching records\n" "-D \tUse nameserver for host lookup.\n" "-G \tUse this nfdump geoDB to lookup country/location.\n" + "-H \tUse nfdump torDB to lookup tor info.\n" "-N\t\tPrint plain numbers\n" "-s [/]\tGenerate statistics for any valid record element.\n" "\t\tand ordered by : packets, bytes, flows, bps pps and bpp.\n" @@ -180,8 +182,8 @@ static void PrintSummary(stat_record_t *stat_record, outputParams_t *outputParam duration = 0; } if (duration > 0 && stat_record->lastseen > 0) { - bps = (stat_record->numbytes << 3) / duration; // bits per second. ( >> 3 ) -> * 8 to convert octets into bits - pps = stat_record->numpackets / duration; // packets per second + bps = (stat_record->numbytes << 3) / duration; // bits per second. ( >> 3 ) -> * 8 to convert octets into bits + pps = stat_record->numpackets / duration; // packets per second bpp = stat_record->numpackets ? stat_record->numbytes / stat_record->numpackets : 0; // Bytes per Packet } if (outputParams->mode == MODE_CSV) { @@ -518,7 +520,7 @@ int main(int argc, char **argv) { nfprof_t profile_data; char *wfile, *ffile, *filter, *tstring, *stat_type; char *print_format; - char *print_order, *query_file, *geo_file, *configFile, *nameserver, *aggr_fmt; + char *print_order, *query_file, *configFile, *nameserver, *aggr_fmt; int ffd, element_stat, fdump; int flow_stat, aggregate, aggregate_mask, bidir; int print_stat, gnuplot_stat, syntax_only, compress, worker; @@ -555,7 +557,8 @@ int main(int argc, char **argv) { aggr_fmt = NULL; configFile = NULL; - geo_file = getenv("NFGEODB"); + char *geo_file = getenv("NFGEODB"); + char *tor_file = getenv("NFTORDB"); outputParams = (outputParams_t *)calloc(1, sizeof(outputParams_t)); if (!outputParams) { @@ -566,7 +569,7 @@ int main(int argc, char **argv) { Ident[0] = '\0'; int c; - while ((c = getopt(argc, argv, "6aA:Bbc:C:D:E:G:s:ghn:i:jf:qyz::r:v:w:J:M:NImO:R:XZt:TVv:W:x:o:")) != EOF) { + while ((c = getopt(argc, argv, "6aA:Bbc:C:D:E:G:s:gH:hn:i:jf:qyz::r:v:w:J:M:NImO:R:XZt:TVv:W:x:o:")) != EOF) { switch (c) { case 'h': usage(argv[0]); @@ -632,6 +635,12 @@ int main(int argc, char **argv) { if (strcmp(optarg, "none") != 0 && !CheckPath(optarg, S_IFREG)) exit(EXIT_FAILURE); geo_file = strdup(optarg); break; + case 'H': + CheckArgLen(optarg, MAXPATHLEN); + if (strcmp(optarg, "none") != 0 && !CheckPath(optarg, S_IFREG)) exit(EXIT_FAILURE); + tor_file = strdup(optarg); + // outputParams->doTag = 1; + break; case 'X': fdump = 1; break; @@ -957,6 +966,19 @@ int main(int argc, char **argv) { outputParams->hasGeoDB = true; } + if (tor_file == NULL) { + tor_file = ConfGetString("tordb.path"); + } + if (tor_file && strcmp(tor_file, "none") == 0) { + tor_file = NULL; + } + if (tor_file) { + if (!CheckPath(tor_file, S_IFREG) || !Init_TorLookup() || !LoadTorTree(tor_file)) { + LogError("Error reading tor info DB file %s", tor_file); + exit(EXIT_FAILURE); + } + outputParams->hasTorDB = true; + } if ((aggregate || flow_stat || print_order) && !Init_FlowCache()) exit(250); if (aggregate && (flow_stat || element_stat)) { diff --git a/src/output/Makefile.am b/src/output/Makefile.am index 1d4cae1c..76613cec 100755 --- a/src/output/Makefile.am +++ b/src/output/Makefile.am @@ -1,5 +1,5 @@ -AM_CPPFLAGS = -I.. -I../include -I../lib -I../decode -I../lib/conf -I../maxmind -I../inline $(DEPS_CFLAGS) +AM_CPPFLAGS = -I.. -I../include -I../lib -I../decode -I../lib/conf -I../maxmind -I../tor -I../inline $(DEPS_CFLAGS) noinst_LIBRARIES = liboutput.a diff --git a/src/output/output.h b/src/output/output.h index c39e69af..6e772dbe 100644 --- a/src/output/output.h +++ b/src/output/output.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, Peter Haag + * Copyright (c) 2024, Peter Haag * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,12 +40,15 @@ typedef void (*RecordPrinter_t)(FILE *, recordHandle_t *, int); typedef void (*PrologPrinter_t)(void); typedef void (*EpilogPrinter_t)(void); -enum { MODE_PLAIN = 0, MODE_JSON, MODE_CSV }; +enum { MODE_PLAIN = 0, + MODE_JSON, + MODE_CSV }; typedef struct outputParams_s { bool printPlain; bool doTag; bool quiet; bool hasGeoDB; + bool hasTorDB; int mode; int topN; } outputParams_t; diff --git a/src/output/output_fmt.c b/src/output/output_fmt.c index 446d0121..107d0884 100644 --- a/src/output/output_fmt.c +++ b/src/output/output_fmt.c @@ -53,6 +53,7 @@ #include "nffile.h" #include "nfxV3.h" #include "output_util.h" +#include "tor.h" #include "userio.h" #include "util.h" @@ -261,6 +262,10 @@ static void String_SrcASorganisation(FILE *stream, recordHandle_t *recordHandle) static void String_DstASorganisation(FILE *stream, recordHandle_t *recordHandle); +static void String_SrcTor(FILE *stream, recordHandle_t *recordHandle); + +static void String_DstTor(FILE *stream, recordHandle_t *recordHandle); + static void String_inPayload(FILE *stream, recordHandle_t *recordHandle); static void String_outPayload(FILE *stream, recordHandle_t *recordHandle); @@ -538,6 +543,8 @@ static struct format_token_list_s { {"%dloc", 0, "Dst IP location info", String_DstLocation}, // dst IP geo location info {"%sasn", 0, "Src AS organisation", String_SrcASorganisation}, // src IP AS organistaion string {"%dasn", 0, "Dst AS organisation", String_DstASorganisation}, // dst IP AS organisation string + {"%stor", 0, "STor", String_SrcTor}, // src IP 2 letter tor node info + {"%dtor", 0, "DTor", String_DstTor}, // dst IP 2 letter tor node info {"%lbl", 0, " label", String_Label}, // Flow Label {"%n", 0, "", String_NewLine}, // \n @@ -2112,6 +2119,35 @@ static void String_DstASorganisation(FILE *stream, recordHandle_t *recordHandle) } // End of String_DstASorganisation +static void String_SrcTor(FILE *stream, recordHandle_t *recordHandle) { + EXipv4Flow_t *ipv4Flow = (EXipv4Flow_t *)recordHandle->extensionList[EXipv4FlowID]; + EXipv6Flow_t *ipv6Flow = (EXipv6Flow_t *)recordHandle->extensionList[EXipv6FlowID]; + EXgenericFlow_t *genericFlow = (EXgenericFlow_t *)recordHandle->extensionList[EXgenericFlowID]; + char torInfo[4]; + if (ipv4Flow) { + LookupV4Tor(ipv4Flow->srcAddr, genericFlow->msecFirst, genericFlow->msecLast, torInfo); + } else { + LookupV6Tor(ipv6Flow->srcAddr, genericFlow->msecFirst, genericFlow->msecLast, torInfo); + } + fprintf(stream, "%2s", torInfo); + +} // End of String_SrcTor + +static void String_DstTor(FILE *stream, recordHandle_t *recordHandle) { + EXipv4Flow_t *ipv4Flow = (EXipv4Flow_t *)recordHandle->extensionList[EXipv4FlowID]; + EXipv6Flow_t *ipv6Flow = (EXipv6Flow_t *)recordHandle->extensionList[EXipv6FlowID]; + EXgenericFlow_t *genericFlow = (EXgenericFlow_t *)recordHandle->extensionList[EXgenericFlowID]; + + char torInfo[4]; + if (ipv4Flow) { + LookupV4Tor(ipv4Flow->dstAddr, genericFlow->msecFirst, genericFlow->msecLast, torInfo); + } else { + LookupV6Tor(ipv6Flow->dstAddr, genericFlow->msecFirst, genericFlow->msecLast, torInfo); + } + fprintf(stream, "%2s", torInfo); + +} // End of String_DstTor + static void String_ivrf(FILE *stream, recordHandle_t *recordHandle) { EXvrf_t *vrf = (EXvrf_t *)recordHandle->extensionList[EXvrfID]; uint32_t ingress = vrf ? vrf->ingressVrf : 0; diff --git a/src/output/output_raw.c b/src/output/output_raw.c index f629b912..5237a136 100644 --- a/src/output/output_raw.c +++ b/src/output/output_raw.c @@ -53,6 +53,7 @@ #include "nfxV3.h" #include "output_util.h" #include "ssl/ssl.h" +#include "tor.h" #include "userio.h" #include "util.h" @@ -147,7 +148,7 @@ static void stringEXgenericFlow(FILE *stream, recordHandle_t *recordHandle, void } // End of EXgenericFlowID -static void stringEXtunIPv4(FILE *stream, EXtunIPv4_t *tunIPv4) { +static void stringEXtunIPv4(FILE *stream, EXtunIPv4_t *tunIPv4, EXgenericFlow_t *genericFlow) { char as[IP_STRING_LEN], ds[IP_STRING_LEN]; uint32_t src = htonl(tunIPv4->tunSrcAddr); @@ -155,18 +156,24 @@ static void stringEXtunIPv4(FILE *stream, EXtunIPv4_t *tunIPv4) { inet_ntop(AF_INET, &src, as, sizeof(as)); inet_ntop(AF_INET, &dst, ds, sizeof(ds)); - char sloc[128], dloc[128]; + char sloc[128], dloc[128], stor[4], dtor[4]; + int srcIsTor = LookupV4Tor(tunIPv4->tunSrcAddr, genericFlow->msecFirst, genericFlow->msecLast, stor + 1); + stor[0] = srcIsTor ? ' ' : '\0'; + int dstIsTor = LookupV4Tor(tunIPv4->tunDstAddr, genericFlow->msecFirst, genericFlow->msecLast, dtor + 1); + dtor[0] = dstIsTor ? ' ' : '\0'; LookupV4Location(tunIPv4->tunSrcAddr, sloc, 128); LookupV4Location(tunIPv4->tunDstAddr, dloc, 128); fprintf(stream, " tun proto = %3u %s\n" - " tun src addr = %16s%s%s\n" - " tun dst addr = %16s%s%s\n", - tunIPv4->tunProto, ProtoString(tunIPv4->tunProto, 0), as, strlen(sloc) ? ": " : "", sloc, ds, strlen(dloc) ? ": " : "", dloc); + " tun src addr = %16s%s%s%s\n" + " tun dst addr = %16s%s%s%s\n", + tunIPv4->tunProto, ProtoString(tunIPv4->tunProto, 0), + as, strlen(sloc) ? ": " : "", sloc, stor, + ds, strlen(dloc) ? ": " : "", dloc, dtor); } // End of stringEXtunIPv4 -static void stringEXtunIPv6(FILE *stream, EXtunIPv6_t *tunIPv6) { +static void stringEXtunIPv6(FILE *stream, EXtunIPv6_t *tunIPv6, EXgenericFlow_t *genericFlow) { char as[IP_STRING_LEN], ds[IP_STRING_LEN]; uint64_t src[2], dst[2]; @@ -177,14 +184,20 @@ static void stringEXtunIPv6(FILE *stream, EXtunIPv6_t *tunIPv6) { inet_ntop(AF_INET6, &src, as, sizeof(as)); inet_ntop(AF_INET6, &dst, ds, sizeof(ds)); - char sloc[128], dloc[128]; + char sloc[128], dloc[128], stor[4], dtor[4]; + int srcIsTor = LookupV6Tor(tunIPv6->tunSrcAddr, genericFlow->msecFirst, genericFlow->msecLast, stor + 1); + stor[0] = srcIsTor ? ' ' : '\0'; + int dstIsTor = LookupV6Tor(tunIPv6->tunDstAddr, genericFlow->msecFirst, genericFlow->msecLast, dtor + 1); + dtor[0] = dstIsTor ? ' ' : '\0'; LookupV6Location(tunIPv6->tunSrcAddr, sloc, 128); LookupV6Location(tunIPv6->tunDstAddr, dloc, 128); fprintf(stream, " tun proto = %3u %s\n" - " tun src addr = %16s%s%s\n" - " tun dst addr = %16s%s%s\n", - tunIPv6->tunProto, ProtoString(tunIPv6->tunProto, 0), as, strlen(sloc) ? ": " : "", sloc, ds, strlen(dloc) ? ": " : "", dloc); + " tun src addr = %16s%s%s%s\n" + " tun dst addr = %16s%s%s%s\n", + tunIPv6->tunProto, ProtoString(tunIPv6->tunProto, 0), + as, strlen(sloc) ? ": " : "", sloc, stor, + ds, strlen(dloc) ? ": " : "", dloc, dtor); } // End of stringEXtunIPv6 @@ -192,11 +205,12 @@ static void stringsEXipv4Flow(FILE *stream, recordHandle_t *recordHandle, void * EXipv4Flow_t *ipv4Flow = (EXipv4Flow_t *)extensionRecord; EXtunIPv4_t *tunIPv4 = (EXtunIPv4_t *)recordHandle->extensionList[EXtunIPv4ID]; EXtunIPv6_t *tunIPv6 = (EXtunIPv6_t *)recordHandle->extensionList[EXtunIPv6ID]; + EXgenericFlow_t *genericFlow = (EXgenericFlow_t *)recordHandle->extensionList[EXgenericFlowID]; if (tunIPv4) - stringEXtunIPv4(stream, tunIPv4); + stringEXtunIPv4(stream, tunIPv4, genericFlow); else if (tunIPv6) - stringEXtunIPv6(stream, tunIPv6); + stringEXtunIPv6(stream, tunIPv6, genericFlow); uint32_t src = htonl(ipv4Flow->srcAddr); uint32_t dst = htonl(ipv4Flow->dstAddr); @@ -205,13 +219,18 @@ static void stringsEXipv4Flow(FILE *stream, recordHandle_t *recordHandle, void * inet_ntop(AF_INET, &src, as, sizeof(as)); inet_ntop(AF_INET, &dst, ds, sizeof(ds)); - char sloc[128], dloc[128]; + char sloc[128], dloc[128], stor[4], dtor[4]; + int srcIsTor = LookupV4Tor(ipv4Flow->srcAddr, genericFlow->msecFirst, genericFlow->msecLast, stor + 1); + stor[0] = srcIsTor ? ' ' : '\0'; + int dstIsTor = LookupV4Tor(ipv4Flow->dstAddr, genericFlow->msecFirst, genericFlow->msecLast, dtor + 1); + dtor[0] = dstIsTor ? ' ' : '\0'; LookupV4Location(ipv4Flow->srcAddr, sloc, 128); LookupV4Location(ipv4Flow->dstAddr, dloc, 128); fprintf(stream, - " src addr = %16s%s%s\n" - " dst addr = %16s%s%s\n", - as, strlen(sloc) ? ": " : "", sloc, ds, strlen(dloc) ? ": " : "", dloc); + " src addr = %16s%s%s%s\n" + " dst addr = %16s%s%s%s\n", + as, strlen(sloc) ? ": " : "", sloc, stor, + ds, strlen(dloc) ? ": " : "", dloc, dtor); } // End of stringsEXipv4Flow @@ -219,11 +238,12 @@ static void stringsEXipv6Flow(FILE *stream, recordHandle_t *recordHandle, void * EXipv6Flow_t *ipv6Flow = (EXipv6Flow_t *)extensionRecord; EXtunIPv4_t *tunIPv4 = (EXtunIPv4_t *)recordHandle->extensionList[EXtunIPv4ID]; EXtunIPv6_t *tunIPv6 = (EXtunIPv6_t *)recordHandle->extensionList[EXtunIPv6ID]; + EXgenericFlow_t *genericFlow = (EXgenericFlow_t *)recordHandle->extensionList[EXgenericFlowID]; if (tunIPv4) - stringEXtunIPv4(stream, tunIPv4); + stringEXtunIPv4(stream, tunIPv4, genericFlow); else if (tunIPv4) - stringEXtunIPv6(stream, tunIPv6); + stringEXtunIPv6(stream, tunIPv6, genericFlow); uint64_t src[2], dst[2]; src[0] = htonll(ipv6Flow->srcAddr[0]); @@ -235,13 +255,18 @@ static void stringsEXipv6Flow(FILE *stream, recordHandle_t *recordHandle, void * inet_ntop(AF_INET6, &src, as, sizeof(as)); inet_ntop(AF_INET6, &dst, ds, sizeof(ds)); - char sloc[128], dloc[128]; + char sloc[128], dloc[128], stor[4], dtor[4]; + int srcIsTor = LookupV6Tor(ipv6Flow->srcAddr, genericFlow->msecFirst, genericFlow->msecLast, stor + 1); + stor[0] = srcIsTor ? ' ' : '\0'; + int dstIsTor = LookupV6Tor(ipv6Flow->dstAddr, genericFlow->msecFirst, genericFlow->msecLast, dtor + 1); + dtor[0] = dstIsTor ? ' ' : '\0'; LookupV6Location(ipv6Flow->srcAddr, sloc, 128); LookupV6Location(ipv6Flow->dstAddr, dloc, 128); fprintf(stream, - " src addr = %16s%s%s\n" - " dst addr = %16s%s%s\n", - as, strlen(sloc) ? ": " : "", sloc, ds, strlen(dloc) ? ": " : "", dloc); + " src addr = %16s%s%s%s\n" + " dst addr = %16s%s%s%s\n", + as, strlen(sloc) ? ": " : "", sloc, stor, + ds, strlen(dloc) ? ": " : "", dloc, dtor); } // End of stringsEXipv6Flow diff --git a/src/tor/Makefile.am b/src/tor/Makefile.am new file mode 100755 index 00000000..747b7304 --- /dev/null +++ b/src/tor/Makefile.am @@ -0,0 +1,18 @@ + +AM_CPPFLAGS = -I.. -I../include -I../lib -I../lib/conf -I../inline $(DEPS_CFLAGS) + +noinst_LIBRARIES = libtorlookup.a + +if TORLOOKUP +bin_PROGRAMS = torlookup +bin_SCRIPTS = updateTorDB.sh +endif + +libtorlookup_a_SOURCES = tor.c tor.h + +if TORLOOKUP +torlookup_SOURCES = torlookup.c +torlookup_LDADD = ../lib/libnfdump.la libtorlookup.a +endif + +CLEANFILES = *.gch diff --git a/src/tor/tor.c b/src/tor/tor.c new file mode 100644 index 00000000..d0fb9a72 --- /dev/null +++ b/src/tor/tor.c @@ -0,0 +1,344 @@ +/* + * Copyright (c) 2024, Peter Haag + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the author nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "tor.h" + +#include +#include +#include +#include +#include +#include + +#include "nffile.h" +#include "nffileV2.h" +#include "nfxV3.h" +#include "util.h" + +// include after +#include "kbtree.h" +#include "nffile_inline.c" + +static inline int torNodeCMP(torNode_t a, torNode_t b) { + if (a.ipaddr == b.ipaddr) return 0; + return a.ipaddr > b.ipaddr ? 1 : -1; +} + +KBTREE_INIT(torTree, torNode_t, torNodeCMP); + +static kbtree_t(torTree) *torTree = NULL; + +// returns ok +int Init_TorLookup(void) { + torTree = kb_init(torTree, KB_DEFAULT_SIZE); + + return 1; +} // End of Init_TorLookup + +static char *tmString(time_t time, char *buff, size_t len) { + struct tm *tmTime = localtime(&time); + snprintf(buff, len, "%4d-%02d-%02d %02d:%02d:%02d", tmTime->tm_year + 1900, tmTime->tm_mon + 1, tmTime->tm_mday, + tmTime->tm_hour, tmTime->tm_min, tmTime->tm_sec); + return buff; +} + +static void printTorNode(torNode_t *node) { + char first[32], last[32], published[32]; + char ip[32]; + uint32_t torIP = ntohl(node->ipaddr); + inet_ntop(PF_INET, &torIP, ip, sizeof(ip)); + printf("Node: %s, last published: %s, intervals: %d\n", ip, + tmString(node->lastPublished, published, sizeof(published)), node->gaps + 1); + for (int i = 0; i <= node->intervalIndex; i++) { + printf(" %d first: %s, last: %s\n", i, tmString(node->interval[i].firstSeen, first, sizeof(first)), + tmString(node->interval[i].lastSeen, last, sizeof(last))); + } +} + +/* + +Published +A node publishes as soon as it passes it's self test +Can also be read as meaning last published +A descriptor is published when changed or 18hrs passed +Doesn't consider if the node went down, for example the vm paused/network interruptions +Uptime must reset (to trigger a re-publish) by restart + +LastStatus +Can be read as meaning the node is active, has the exit flag, and not marked bad +The exit is allowed to be bad when consensus method is less than 11 +So in the interval [Published, LastStatus] having Published < LastStatus this means + +the self test was passed, consensus was taken, and an exit-policy was published with the descriptor +the authority managed to connect to this node in the last 45 minutes from LastStatus +Be careful where Published > LastStatus such as this case + +Published 2014-12-22 22:00:29 +LastStatus 2014-12-22 20:03:07 +ExitAddress xx.xx.xxx.xx 2014-12-22 22:14:11 + +The node may have given up the Exit flag, or may have gotten BadExit. Tor has many transient qualities so things like +this happen. The node was tested after re-publishing it's descriptor but didn't get consensus (but was still in the +cache of TorDNSEL). + +A similar argument applies in the interval [Published, Test] except you know the node was an exit at the time of the +test. Consider another example from the CollecTor page. + +Published 2010-12-28 07:35:55 +LastStatus 2010-12-28 08:10:11 +ExitAddress 91.102.152.236 2010-12-28 07:10:30 +ExitAddress 91.102.152.227 2010-12-28 10:35:30 + +This node last updated it's descriptor at 07:35:55. It did this after the test at 7:10:30. The last consensus was at +08:10:11 and the last test was at 10:35:30. What conclusions can be drawn about both exit ip's? The first ip was +internal to tor and was also the exit. The second was found to be an exit. Maybe they've only got one ip address and it +changed. Maybe they run a multi-homed node. The point is what we know (and can guarantee) is limited compared to what we +can try to guess (and may be right but certainly not in general). + +Even in case I can get no guarantees, I'd be happy to know if I can at least get a time interval for which the address +was probably an exit node. So then what's the answer? It's this -- hope for the best. A node that isn't in the consensus +can (but probably shouldn't) still be tested. For an arbitrary entry any time after the LastStatus (minus a max of 45 +minutes) until you take the consensus as expired is such an interval. Depending on how you consider expired consensus +that means 1 - 3 hours (up to fresh-until, or, not past valid-until). + +tl;dr, for a given LastStatus, 1 - 3 hours depending on your preferred view of expired consensus. This is based on +hoping for the best possible scenario in general. It would be best to consider the history of an exit across as many +sample points as possible. +*/ +void UpdateTorNode(torNode_t *torNode) { + torNode_t *node = kb_getp(torTree, torTree, torNode); + if (node) { + int index = node->intervalIndex; + // printf("node updated\n"); + if (node->lastPublished <= torNode->lastPublished) { + time_t diffPublishTime = torNode->lastPublished - node->lastPublished; + if (diffPublishTime > (24 * 3600)) { + time_t diffLastSeen = torNode->interval[0].lastSeen - node->interval[index].lastSeen; + if (diffLastSeen > (24 * 3600)) { + dbg_printf("Last published gap > 18h %ld\n", diffPublishTime / 3600); + node->gaps++; + node->intervalIndex = (node->intervalIndex + 1) % MAXINTERVALS; + index = node->intervalIndex; + dbg_printf("Not seen in 24h - %ld. %d gaps, index: %d\n", diffLastSeen / 3600, node->gaps, index); + node->interval[index].firstSeen = torNode->lastPublished; + } + } + + node->lastPublished = torNode->lastPublished; + if (torNode->interval[0].lastSeen > node->interval[index].lastSeen) + node->interval[index].lastSeen = torNode->interval[0].lastSeen; + if (torNode->interval[0].firstSeen < node->interval[index].firstSeen) abort(); + } +#ifdef DEVEL + if (print) { + printTorNode(node); + printTorNode(torNode); + printf("--\n\n"); + } +#endif + } else { + torNode->interval[0].firstSeen = torNode->lastPublished; + kb_putp(torTree, torTree, torNode); + // printf("node inserted\n"); + } +} + +int SaveTorTree(char *fileName) { + nffile_t *nffile = OpenNewFile(fileName, NULL, CREATOR_TORLOOKUP, LZ4_COMPRESSED, NOT_ENCRYPTED); + + void *outBuff = nffile->buff_ptr; + size_t size = 0; + + kbitr_t itr; + kb_itr_first(torTree, torTree, &itr); // get an iterator pointing to the first + for (; kb_itr_valid(&itr); kb_itr_next(torTree, torTree, &itr)) { // move on + torNode_t *torNode = &kb_itr_key(torNode_t, &itr); + dbg_printf("ip: %u, first: %ld, last: %ld\n", torNode->ipaddr, torNode->firstSeen, torNode->lastSeen); + if (size < sizeof(torNode_t)) { + nffile->buff_ptr = (void *)outBuff; + size = CheckBufferSpace(nffile, sizeof(torNode_t)); + + // make it an array block + nffile->block_header->type = DATA_BLOCK_TYPE_4; + + outBuff = nffile->buff_ptr; + recordHeader_t *arrayHeader = (recordHeader_t *)outBuff; + // set array element info + arrayHeader->type = TorTreeElementID; + arrayHeader->size = sizeof(torNode_t); + nffile->block_header->size += sizeof(recordHeader_t); + size -= sizeof(recordHeader_t); + outBuff += sizeof(recordHeader_t); + } + memcpy(outBuff, torNode, sizeof(torNode_t)); + outBuff += sizeof(torNode_t); + size -= sizeof(torNode_t); + nffile->block_header->size += sizeof(torNode_t); + nffile->block_header->NumRecords++; + } + + return CloseUpdateFile(nffile); + +} // End of SaveTorTree + +int LoadTorTree(char *fileName) { + dbg_printf("Load TorNode DB file %s\n", fileName); + nffile_t *nffile = OpenFile(fileName, NULL); + if (!nffile) { + return 0; + } + + int done = 0; + while (!done) { + // get next data block from file + int ret = ReadBlock(nffile); + + switch (ret) { + case NF_CORRUPT: + case NF_ERROR: + if (ret == NF_CORRUPT) + LogError("Skip corrupt data file '%s'\n", nffile->fileName); + else + LogError("Read error in file '%s': %s\n", nffile->fileName, strerror(errno)); + // fall through - get next file in chain + case NF_EOF: + done = 1; + continue; + break; // not really needed + } + + dbg_printf("Next block. type: %u, size: %u\n", nffile->block_header->type, nffile->block_header->size); + if (nffile->block_header->type != DATA_BLOCK_TYPE_4) { + LogError("Can't process block type %u. Skip block.\n", nffile->block_header->type); + continue; + } + + record_header_t *arrayHeader = nffile->buff_ptr; + void *arrayElement = (void *)nffile->buff_ptr + sizeof(record_header_t); + size_t expected = (arrayHeader->size * nffile->block_header->NumRecords) + sizeof(record_header_t); + if (expected != nffile->block_header->size) { + LogError("Array size calculated: %u != expected: %u for element: %u", expected, nffile->block_header->size, + arrayHeader->type); + continue; + } + + switch (arrayHeader->type) { + case TorTreeElementID: { + torNode_t *torNode = (torNode_t *)arrayElement; + for (int i = 0; i < nffile->block_header->NumRecords; i++) { + torNode_t *node = kb_getp(torTree, torTree, torNode); + if (node) { + LogError("Duplicate IP node: ip: 0x%x", torNode->ipaddr); + } else { + kb_putp(torTree, torTree, torNode); + } + torNode++; + } + } break; + default: + LogError("Skip unknown array element: %u", arrayHeader->type); + } + } + DisposeFile(nffile); + + return 1; +} // End of LoadMaxMind + +// return 1 - if IP is tor exit node +// input nfdump IP addr, first/last in msec +int LookupV4Tor(uint32_t ip, uint64_t first, uint64_t last, char *torInfo) { + if (!torTree) { + torInfo[0] = '\0'; + return 0; + } + + torNode_t searchNode = {.ipaddr = ip}; + torNode_t *torNode = kb_getp(torTree, torTree, &searchNode); + if (torNode) { + first /= 1000; + last /= 1000; + for (int i = 0; i <= torNode->intervalIndex; i++) { + // allow 24h over last seen + time_t graceLastSeen = torNode->interval[i].lastSeen + 24 * 3600; + if ((first >= torNode->interval[i].firstSeen && first <= graceLastSeen) || + (last >= torNode->interval[i].firstSeen && last <= graceLastSeen)) { + torInfo[0] = 'T'; + torInfo[1] = 'X'; + torInfo[2] = '\0'; + return 1; + } + } + torInfo[0] = 't'; + torInfo[1] = 'x'; + torInfo[2] = '\0'; + return 1; + } else { + // nothing found + torInfo[0] = '.'; + torInfo[1] = '.'; + torInfo[2] = '\0'; + } + + return 0; + +} // End of LookupTor + +int LookupV6Tor(uint64_t ip[2], uint64_t first, uint64_t last, char *torInfo) { + if (!torTree) { + torInfo[0] = '\0'; + return 0; + } + + // IPv6 not yet implemented + torInfo[0] = '.'; + torInfo[1] = '.'; + torInfo[2] = '\0'; + + return 0; + +} // End of LookupTor + +void LookupIP(char *ipstring) { + if (!torTree) { + printf("No torDB available"); + return; + } + // IPv4 + uint32_t ip; + int ret = inet_pton(PF_INET, ipstring, &ip); + if (ret != 1) return; + torNode_t searchNode = {.ipaddr = ntohl(ip)}; + torNode_t *torNode = kb_getp(torTree, torTree, &searchNode); + if (torNode) { + printTorNode(torNode); + } else { + printf("No tor exit node: %s\n", ipstring); + } +} diff --git a/src/tor/tor.h b/src/tor/tor.h new file mode 100644 index 00000000..5e6f7089 --- /dev/null +++ b/src/tor/tor.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2021, Peter Haag + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the author nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _TOR_H +#define _TOR_H 1 + +#include +#include +#include + +#include "config.h" +#include "kbtree.h" + +typedef struct interval_s { + time_t firstSeen; + time_t lastSeen; +} interval_t; + +#define MAXINTERVALS 8 + +typedef struct torNode_s { + uint32_t ipaddr; + uint16_t gaps; + uint16_t intervalIndex; + time_t lastPublished; + interval_t interval[MAXINTERVALS]; +} torNode_t; + +#define TorTreeElementID 6 + +int Init_TorLookup(void); + +void UpdateTorNode(torNode_t *torNode); + +int LoadTorTree(char *fileName); + +int SaveTorTree(char *fileName); + +int LookupV4Tor(uint32_t ip, uint64_t first, uint64_t last, char *torInfo); + +int LookupV6Tor(uint64_t ip[2], uint64_t first, uint64_t last, char *torInfo); + +void LookupIP(char *ipstring); + +#endif \ No newline at end of file diff --git a/src/tor/torlookup.c b/src/tor/torlookup.c new file mode 100644 index 00000000..943dcafe --- /dev/null +++ b/src/tor/torlookup.c @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2024, Peter Haag + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the author nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nfconf.h" +#include "nffile.h" +#include "tor.h" +#include "util.h" + +#define TAG_EXITNODE "ExitNode" +#define TAG_PUBLISHED "Published" +#define TAG_LASTSTATUS "LastStatus" +#define TAG_EXITADDRESS "ExitAddress" + +static void usage(char *name); + +static int traverseTree(char *const argv[]); + +static int compare(const FTSENT **f1, const FTSENT **f2); + +static int epoch_days(int y, int m, int d); + +static time_t hms_to_time(int h, int m, int s); + +static time_t ReadTime(char *timestring); + +static int scanLine(char *line, torNode_t *torNode); + +static int processFile(char *torFile); + +static int traverseTree(char *const argv[]); + +static void usage(char *name) { + printf( + "usage %s [options] \n" + "-h\t\tthis text you see right here.\n" + "-T \ttor nodeDB in nfdump format to lookup tor info.\n" + "-d \tDirectory containing ascii tor info files to be convert into nfdump tor nodeDB.\n" + "-w \tName of nfdump torDB file.\n", + name); +} // End of usage + +// parse integer from string, up to eos char +// update string after reading +static int inline getNumber(char **timeString, char eos) { + int number = 0; + char *s = *timeString; + + char *eosp = strchr(s, eos); + if (eosp) *eosp++ = '\0'; + while (*s != '\0') { + if (*s >= '0' && *s <= '9') { + number = 10 * number + (*s - 0x30); + } else { + return 0; + } + s++; + } + *timeString = eosp; + return number; +} + +/* + * we would need timegm(), but that function is not portable and too slow + * so we take a local implementation to convert a time string to UNIX epoch + */ +static int epoch_days(int y, int m, int d) { + const uint32_t year_base = 4800; /* Before min year, multiple of 400. */ + const uint32_t m_adj = m - 3; /* March-based month. */ + const uint32_t carry = m_adj > m ? 1 : 0; + const uint32_t adjust = carry ? 12 : 0; + const uint32_t y_adj = y + year_base - carry; + const uint32_t month_days = ((m_adj + adjust) * 62719 + 769) / 2048; + const uint32_t leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400; + return y_adj * 365 + leap_days + month_days + (d - 1) - 2472632; +} + +static time_t hms_to_time(int h, int m, int s) { return (h * 3600) + (m * 60) + s; } + +// expected tie format "%Y-%m-%d %H:%M:%S" +// example: 2010-12-28 07:35:55 +static time_t ReadTime(char *timestring) { + time_t epoch = 0; + + int year = getNumber(×tring, '-'); + int mon = getNumber(×tring, '-'); + int mday = getNumber(×tring, ' '); + int hour = getNumber(×tring, ':'); + int min = getNumber(×tring, ':'); + int sec = getNumber(×tring, '\n'); + epoch = 86400 * epoch_days(year, mon, mday); + epoch += hms_to_time(hour, min, sec); + + return epoch; +} + +static int scanLine(char *line, torNode_t *torNode) { + if (strstr(line, TAG_EXITNODE) != NULL) { + memset((void *)torNode, 0, sizeof(torNode_t)); + } else if (strstr(line, TAG_PUBLISHED) != NULL) { + char *timestring = line + strlen(TAG_PUBLISHED) + 1; + time_t lastPublished = ReadTime(timestring); + torNode->lastPublished = lastPublished; + torNode->interval[0].firstSeen = torNode->lastPublished; + } else if (strstr(line, TAG_LASTSTATUS) != NULL) { + char *timestring = line + strlen(TAG_LASTSTATUS) + 1; + time_t lastStatus = ReadTime(timestring); + if (lastStatus > torNode->interval[0].lastSeen) torNode->interval[0].lastSeen = lastStatus; + } else if (strstr(line, TAG_EXITADDRESS) != NULL) { + char *ipstring = line + strlen(TAG_EXITADDRESS) + 1; + char *timestring = strchr(ipstring, ' '); + *timestring++ = '\0'; + uint32_t ip = 0; + int ret = inet_pton(PF_INET, ipstring, &ip); + if (ret == 1) { + torNode->ipaddr = htonl(ip); + time_t lastSeen = ReadTime(timestring); + if (lastSeen > torNode->interval[0].lastSeen) torNode->interval[0].lastSeen = lastSeen; + return 1; + } else { + LogError("Unpasable IP address: %s", ipstring); + } + } + return 0; +} + +static int processFile(char *torFile) { + FILE *fp; + char *line = NULL; + size_t len = 0; + ssize_t read; + + fp = fopen(torFile, "r"); + if (fp == NULL) return errno; + + torNode_t torNode = {0}; + while ((read = getline(&line, &len, fp)) != -1) { + // printf("Next line: %s", line); + int ipfound = scanLine(line, &torNode); + if (ipfound) { + UpdateTorNode(&torNode); + } + } + + fclose(fp); + if (line) free(line); + + return 0; +} // End of processFile + +static int compare(const FTSENT **f1, const FTSENT **f2) { return strcmp((*f1)->fts_name, (*f2)->fts_name); } // End of compare + +/* + * returns ok + */ +static int traverseTree(char *const argv[]) { + const char spinner[4] = {'|', '/', '-', '\\'}; + FTS *ftsp; + FTSENT *p, *chp; + int fts_options = FTS_COMFOLLOW | FTS_LOGICAL | FTS_NOCHDIR; + int cnt = 0; + + // make stdout unbuffered for progress pointer + setvbuf(stdout, (char *)NULL, _IONBF, 0); + + if ((ftsp = fts_open(argv, fts_options, compare)) == NULL) { + LogError("fts_open(): %s:", strerror(errno)); + return 0; + } + /* Initialize ftsp with as many argv[] parts as possible. */ + chp = fts_children(ftsp, 0); + if (chp == NULL) { + LogError("fts_open(): %s:", "No files found"); + return 0; /* no files to traverse */ + } + while ((p = fts_read(ftsp)) != NULL) { + switch (p->fts_info) { + case FTS_D: + dbg_printf("d %s\n", p->fts_path); + break; + case FTS_F: + printf("\r%c", spinner[cnt & 0x3]); + cnt++; + dbg_printf(" f %s\n", p->fts_path); + int err = processFile(p->fts_path); + if (err) return err; + break; + default: + break; + } + } + fts_close(ftsp); + return 1; +} + +// Return a pointer to the trimmed string +static char *string_trim(char *s) { + while (isspace((unsigned char)*s)) s++; + if (*s) { + char *p = s; + while (*p) p++; + while (isspace((unsigned char)*(--p))) + ; + p[1] = '\0'; + } + + return s; +} // end of string_trim + +static int valid_ipv4(char *s) { + char *c = s; + int i = 0; + while (*c) { + if ((!isdigit(*c) && *c != '.') || i > 15) { + return 0; + } + c++; + i++; + } + + c = strdup(s); + int numbers = 0; + char *sep = "."; + char *brkt; + char *ns = strtok_r(c, sep, &brkt); + while (ns) { + int num = atoi(ns); + if (num > 255) { + free(c); + return 0; + } + numbers++; + ns = strtok_r(NULL, sep, &brkt); + } + + free(c); + return numbers == 4; +} + +int main(int argc, char **argv) { + char *dirName = NULL; + char *torFileDB = getenv("NFTORDB"); + char *wfile = "torDB.nf"; + int c; + while ((c = getopt(argc, argv, "hd:H:w:")) != EOF) { + switch (c) { + case 'h': + usage(argv[0]); + exit(0); + break; + case 'd': + if (!CheckPath(optarg, S_IFDIR)) exit(EXIT_FAILURE); + dirName = strdup(optarg); + break; + case 'w': + wfile = optarg; + break; + case 'H': + if (!CheckPath(optarg, S_IFREG)) exit(EXIT_FAILURE); + torFileDB = strdup(optarg); + break; + default: + usage(argv[0]); + exit(0); + } + } + + if (!Init_nffile(1, NULL) || !Init_TorLookup()) exit(EXIT_FAILURE); + + if (dirName && wfile) { + char *pathList[2] = {dirName, NULL}; + if (traverseTree(pathList) == 0 || SaveTorTree(wfile) == 0) { + exit(EXIT_FAILURE); + } + exit(EXIT_SUCCESS); + } + + if (torFileDB == NULL) { + if (ConfOpen(NULL, "nfdump") < 0) exit(EXIT_FAILURE); + torFileDB = ConfGetString("tordb.path"); + } + + if (torFileDB == NULL) { + LogError("Missing nfdump tor DB. -T or NFTORDB env required"); + exit(EXIT_FAILURE); + } + + if (!LoadTorTree(torFileDB)) { + LogError("Failed to load nfdump tor DB"); + exit(EXIT_FAILURE); + } + + if (argc - optind > 0) { + while (argc - optind > 0) { + char *arg = argv[optind++]; + if (strlen(arg) > 2 && (valid_ipv4(arg))) { + LookupIP(arg); + } else { + LogError("Not a valid IPv4 or IPv6: ", arg); + exit(EXIT_FAILURE); + } + } + } else { + char *line = NULL; + size_t linecap = 0; + ssize_t lineLen; + // read each line - trimm \n + while ((lineLen = getline(&line, &linecap, stdin)) > 0) { + if (lineLen > 1024) { + LogError("Line length error"); + exit(EXIT_FAILURE); + } + char *eol = strchr(line, '\n'); + *eol = '\0'; + + // split ' ' separated words and check, if it's an IPv4/v6 + char *sep = " "; + char *word, *brkt; + word = strtok_r(line, sep, &brkt); + while (word) { + if (valid_ipv4(word)) { + LookupIP(string_trim(word)); + } + word = strtok_r(NULL, sep, &brkt); + } + } + } + + exit(EXIT_SUCCESS); +} \ No newline at end of file diff --git a/src/tor/updateTorDB.sh b/src/tor/updateTorDB.sh new file mode 100755 index 00000000..6683245e --- /dev/null +++ b/src/tor/updateTorDB.sh @@ -0,0 +1,96 @@ +#!/bin/sh + +# Script to update nfdump toDB. + +# Where to put local download data +TMPDIR="/tmp/tor_raw_data.$$" + +# Use the nfdump tor DB environment variable NFTORDB if set +# or put the default name it in current directory +NFTORDB=${NFTORDB:="./tordb.nf"} + +# Fetch all tor files from the last n months. +# Days do not matter only full months are taken. +# The current month counts also as full month +# This default value may be overwritten on the command line +NUM_MONTHS=6 + +# Tor exit node URL +EXIT_URL="https://collector.torproject.org/archive/exit-lists" + +# Usage info +usage () { + echo "Usage : $1 [num]" + echo 'Fetch tor exit node list from last [num] months and create the nfdump tor lookup DB.' + echo '[num] is optional and defaults to 6 months' + exit +} + +# Fetch the tor files +fetch_files() { + n=$(($1 -1 )) + current_year=$(date +"%Y") + current_month=$(date +"%m") + for i in $(seq $n 0); do + month=$(($current_month - $i)) + year=$current_year + if [ $month -le 0 ]; then + month=$(($month + 12)) + year=$(($current_year - 1)) + fi + if [ $month -lt 10 ]; then + month="0${month}" + fi + /bin/echo -n "Fetch exit-list-$year-$month.tar.xz: .. " + wget -q "${EXIT_URL}/exit-list-$year-$month.tar.xz" + if [ $? -eq 0 -a -f exit-list-$year-$month.tar.xz ]; then + tar Jxf exit-list-$year-$month.tar.xz + rm exit-list-$year-$month.tar.xz + /bin/echo OK. + else + /bin/echo failed. + fi + done +} + +## +# Main starts here +## + +if [ $# -gt 1 ]; then + usage $0 +fi + +# Only accept numbers +if [ $# -eq 1 ]; then + case $1 in + ''|*[!0-9]*) + echo "Argument not a positive number" + usage $0 + ;; + *) + NUM_MONTHS=$1 + ;; + esac +fi + +if [ $NUM_MONTHS -le 0 -o $NUM_MONTHS -gt 24 ]; then + echo "Number of months: $NUM_MONTHS out of 1..24" + exit +fi + +echo "Get tor node exit list for the last $NUM_MONTHS months" + +# tmp data dir +cur=`pwd` +test -d $TMPDIR && rm -rf $TMPDIR +mkdir $TMPDIR + +cd $TMPDIR +fetch_files $NUM_MONTHS +cd $cur + +echo Building nfdump tordb: $NFTORDB +#torlookup -d $TMPDIR -w $NFTORDB && rm -rf $TMPDIR +torlookup -d $TMPDIR -w $NFTORDB +echo Done.