Skip to content
This repository has been archived by the owner on Jan 29, 2021. It is now read-only.

Commit

Permalink
Merge pull request #70 from activecm/57-disable-db-rotation
Browse files Browse the repository at this point in the history
Disable Database Rotation For Testing Purposes
  • Loading branch information
SamuelCarroll authored Jan 11, 2019
2 parents 0c19396 + a6dd4fe commit ee1c0a5
Show file tree
Hide file tree
Showing 5 changed files with 281 additions and 23 deletions.
62 changes: 43 additions & 19 deletions converter/commands/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,31 @@ import (
"github.com/activecm/ipfix-rita/converter/environment"
input "github.com/activecm/ipfix-rita/converter/input/mgologstash"
"github.com/activecm/ipfix-rita/converter/logging"
"github.com/activecm/ipfix-rita/converter/output/rita/streaming/dates"
"github.com/activecm/ipfix-rita/converter/output"
batchRITAOutput "github.com/activecm/ipfix-rita/converter/output/rita/batch/dates"
streamingRITAOutput "github.com/activecm/ipfix-rita/converter/output/rita/streaming/dates"
"github.com/activecm/ipfix-rita/converter/stitching"
"github.com/benbjohnson/clock"
"github.com/urfave/cli"
)

func init() {
noRotateFlag := cli.BoolFlag{
Name: "no-rotate, r",
Usage: "Do not create and rotate daily databases. Instead, split the incoming flows based on their timestamps into day-by-day databases and make them available to RITA when IPFIX-RITA shuts down.",
}

GetRegistry().RegisterCommands(cli.Command{
Name: "run",
Usage: "Run the IPFIX-RITA converter",
Flags: []cli.Flag{noRotateFlag},
Action: func(c *cli.Context) error {
env, err := environment.NewDefaultEnvironment()
if err != nil {
return cli.NewExitError(fmt.Sprintf("%+v\n", err), 1)
}
err = convert(env)
noRotate := c.Bool("no-rotate")
err = convert(env, noRotate)
if err != nil {
env.Logger.Error(err, nil)
return cli.NewExitError(nil, 1)
Expand All @@ -36,7 +45,7 @@ func init() {
})
}

func convert(env environment.Environment) error {
func convert(env environment.Environment, noRotate bool) error {

//use CTRL-C as our signal to wrap up and exit
ctx, _ := interruptContext(env.Logger)
Expand Down Expand Up @@ -132,22 +141,37 @@ func convert(env environment.Environment) error {
//bulkBatchSize is how much data is shipped to MongoDB at a time
bulkBatchSize := outputBufferSize

dayRotationPeriodMillis := int64(1000 * 60 * 60 * 24) //daily datasets
gracePeriodMillis := int64(1000 * 60 * 5) //analysis can happen after 12:05 am
dateFormatString := "2006-01-02"

//NewStreamingRITATimeIntervalWriter creates a MongoDB/RITA conn-record writer
//which splits output records up based on the time the connection finished
writer, err := dates.NewStreamingRITATimeIntervalWriter(
env.GetOutputConfig().GetRITAConfig(),
env.GetIPFIXConfig(),
bulkBatchSize, flushDeadline,
dayRotationPeriodMillis, gracePeriodMillis,
clock.New(), time.Local, dateFormatString,
env.Logger,
)
if err != nil {
return err
var writer output.SessionWriter

if !noRotate {
dayRotationPeriodMillis := int64(1000 * 60 * 60 * 24) //daily datasets
gracePeriodMillis := int64(1000 * 60 * 5) //analysis can happen after 12:05 am
dateFormatString := "2006-01-02"

//NewStreamingRITATimeIntervalWriter creates a MongoDB/RITA conn-record writer
//which splits output records up based on the time the connection finished
writer, err = streamingRITAOutput.NewStreamingRITATimeIntervalWriter(
env.GetOutputConfig().GetRITAConfig(),
env.GetIPFIXConfig(),
bulkBatchSize, flushDeadline,
dayRotationPeriodMillis, gracePeriodMillis,
clock.New(), time.Local, dateFormatString,
env.Logger,
)
if err != nil {
return err
}
} else {
writer, err = batchRITAOutput.NewBatchRITAConnDateWriter(
env.GetOutputConfig().GetRITAConfig(),
env.GetIPFIXConfig(),
bulkBatchSize, flushDeadline,
env.Logger,
)
if err != nil {
return err
}
env.Info("Database rotation has been disabled", nil)
}

//input channels
Expand Down
178 changes: 178 additions & 0 deletions converter/output/rita/batch/dates/rita_dates.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
package dates

import (
"context"
"net"
"time"

"github.com/activecm/ipfix-rita/converter/config"
"github.com/activecm/ipfix-rita/converter/logging"
"github.com/activecm/ipfix-rita/converter/output"
"github.com/activecm/ipfix-rita/converter/output/rita"
"github.com/activecm/ipfix-rita/converter/output/rita/buffered"
"github.com/activecm/ipfix-rita/converter/stitching/session"
"github.com/activecm/rita/parser/parsetypes"
"github.com/pkg/errors"
)

//batchRITAConnDateWriter writes session aggregates to MongoDB
//as RITA Conn records. Each record is routed
//to a database depending on the FlowEnd time. Additionally, it creates
//a RITA MetaDB record for each database before inserting data
//into the respective database. The data is batched up in buffers
//before being sent to MongoDB. The buffers are flushed when
//they are full or after a deadline passes for the individual buffer.
type batchRITAConnDateWriter struct {
db rita.OutputDB
localNets []net.IPNet
outputCollections map[string]*buffered.AutoFlushCollection
bufferSize int64
autoFlushTime time.Duration
autoFlushContext context.Context
autoFlushOnFatal func()
log logging.Logger
}

//NewBatchRITAConnDateWriter creates an buffered RITA compatible writer
//which splits records into different databases depending on the
//each record's flow end date. Metadatabase records are created
//as the output databases are created. Each buffer is flushed
//when the buffer is full or after a deadline passes.
func NewBatchRITAConnDateWriter(ritaConf config.RITA, ipfixConf config.IPFIX,
bufferSize int64, autoFlushTime time.Duration, log logging.Logger) (output.SessionWriter, error) {
db, err := rita.NewOutputDB(ritaConf)
if err != nil {
return nil, errors.Wrap(err, "could not connect to RITA MongoDB")
}

//parse local networks
localNets, localNetsErrs := ipfixConf.GetLocalNetworks()
if len(localNetsErrs) != 0 {
for i := range localNetsErrs {
log.Warn("could not parse local network", logging.Fields{"err": localNetsErrs[i]})
}
}

autoFlushContext, autoFlushOnFail := context.WithCancel(context.Background())
//return the new writer
return &batchRITAConnDateWriter{
db: db,
localNets: localNets,
outputCollections: make(map[string]*buffered.AutoFlushCollection),
bufferSize: bufferSize,
autoFlushTime: autoFlushTime,
autoFlushContext: autoFlushContext,
autoFlushOnFatal: autoFlushOnFail,
log: log,
}, nil
}

func (r *batchRITAConnDateWriter) Write(sessions <-chan *session.Aggregate) <-chan error {
errs := make(chan error)
go func() {
defer close(errs)
defer r.closeDBSessions(errs)

WriteLoop:
for {
select {
case <-r.autoFlushContext.Done():
break WriteLoop
case sess, ok := <-sessions:
// check if the program is shutting down
if !ok {
break WriteLoop
}
// ensure there weren't any errors in the autoflusher
// NOTE: select is nondeterministic, so sess may be selected
// even though the context has triggered. This means we need
// to check it again here.
select {
case <-r.autoFlushContext.Done():
break WriteLoop
default:
}

//convert the record to RITA output
var connRecord parsetypes.Conn
sess.ToRITAConn(&connRecord, r.isIPLocal)

//create/ get the buffered output collection
outColl, err := r.getConnCollectionForSession(sess, errs, r.autoFlushOnFatal)
if err != nil {
errs <- err
break WriteLoop
}

//insert the record
err = outColl.Insert(connRecord)
if err != nil {
errs <- err
break WriteLoop
}
}
}
}()
return errs
}

func (r *batchRITAConnDateWriter) closeDBSessions(errs chan<- error) {
for i := range r.outputCollections {
r.outputCollections[i].Close()

err := r.db.MarkImportFinishedInMetaDB(r.outputCollections[i].Database())
//stops outputCollections from sending on errs
if err != nil {
errs <- err
}

}
r.db.Close()
}

func (r *batchRITAConnDateWriter) isIPLocal(ipAddrStr string) bool {
ipAddr := net.ParseIP(ipAddrStr)
for i := range r.localNets {
if r.localNets[i].Contains(ipAddr) {
return true
}
}
return false
}

func (r *batchRITAConnDateWriter) getConnCollectionForSession(sess *session.Aggregate,
autoFlushAsyncErrChan chan<- error, autoFlushOnFatal func()) (*buffered.AutoFlushCollection, error) {

//get the latest flowEnd time
endTimeMilliseconds := sess.FlowEndMilliseconds()
//time.Unix(seconds, nanoseconds)
//1000 milliseconds per second, 1000 nanosecodns to a microsecond. 1000 microseconds to a millisecond
endTime := time.Unix(endTimeMilliseconds/1000, (endTimeMilliseconds%1000)*1000*1000)
endTimeStr := endTime.Format("2006-01-02")

//cache the database connection
outBufferedColl, ok := r.outputCollections[endTimeStr]
if !ok {
//connect to the db
var err error
outColl, err := r.db.NewRITAOutputConnection(endTimeStr)
if err != nil {
return nil, errors.Wrapf(err, "could not connect to output database for suffix: %s", endTimeStr)
}

//create the meta db record
err = r.db.EnsureMetaDBRecordExists(outColl.Database.Name)
if err != nil {
outColl.Database.Session.Close()
return nil, err
}

//create the output buffer
outBufferedColl = buffered.NewAutoFlushCollection(outColl, r.bufferSize, r.autoFlushTime)
outBufferedColl.StartAutoFlush(autoFlushAsyncErrChan, autoFlushOnFatal)

//cache the result
r.outputCollections[endTimeStr] = outBufferedColl
}
return outBufferedColl, nil
}
48 changes: 48 additions & 0 deletions docs/Developer Notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,51 @@ The `make-release` script will then exit. Now, you must go to the Github page
for the project and make a new release. Set the referenced tag for the release
to the version you entered into the VERSION file. Add a small write up for
the new version and attach the resulting tarball to the release.

### Replaying a PCAP with Netflow Data

While any PCAP can be converted into IPFIX records using YAF (see [Generating Data.md](./Generating%20Data.md)), PCAPs which contain raw Netflow v5/v9/IPFix
data may be replayed directly using the [replay-pcap.py script](../dev-scripts/replay-pcap.py).

The script requires **python3** and **scapy**. **scapy** can be installed by running `pip3 install scapy`.

```
./replay-pcap.py --help
This script reads in a [pcap-file], extracts the UDP packets sent to
[old-dest-ip] on [old-dest-port], and sends the data in the packets to
[new-dest-ip] on [new-dest-port].
Usage:
./replay-pcap.py pcap-file old-dest-ip old-dest-port new-dest-ip new-dest-port
```

If the PCAP was not captured on the same day as it was replayed, database rotation
must be disabled in the converter.

### Disabling Database Rotation

The converter automatically rotates the resulting RITA databases on a day by day basis.
In order to carry this out, the converter rejects any flows which are not timestamped
within the current day (Note: there is a small grace period in which flows from the
previous day are not rejected).

This presents a problem when processing data stored in a PCAP file.

Records stored in PCAP files can be converted into IPFIX flows using YAF. In this case, the timestamps
in the PCAP can be aligned to the current date using the [align_pcap_to_today.sh](../dev-scripts/align_pcap_to_today.sh) script before YAF performs the conversion.

However, this alignment can't be performed when replaying raw
Netflow v5/v9/IPFix data as recorded in a PCAP.

Restarting IPFix-RITA with the following commands prevents this problem from
occurring by disabling the database rotation.
```
sudo ipfix-rita rm -fs converter
sudo ipfix-rita -f /opt/ipfix-rita/lib/docker-compose/no-rotate.yaml up -d converter
```

IPFix-RITA will then place each stitched session into a database matching its
closing timestamp. These databases will not be ready for analysis until IPFix-RITA
is stopped i.e. the `ImportFinished` flag is not set on the created MetaDatabase
records until the program exits.
11 changes: 7 additions & 4 deletions runtime/bin/ipfix-rita
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@ set -e

# Change dir to script dir
pushd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")" > /dev/null
export COMPOSE_FILE="../lib/docker-compose/main.yaml"

export COMPOSE_PROJECT_NAME=IPFIX_RITA

# Use _COMPOSE_FILE to allow the use of "-f"
_COMPOSE_FILE="../lib/docker-compose/main.yaml"

# Let users/ the install script override the version to run
export IPFIX_RITA_VERSION="${IPFIX_RITA_VERSION:-latest}"
DOCKER_IMAGE_IN="images-${IPFIX_RITA_VERSION}.tgz"
Expand All @@ -29,16 +32,16 @@ if [ "$INPUT_WORKERS" -lt 1 ]; then
fi

# Ensure the timezone is set inside the docker containers
# We use the TZ variable rather than bind mount /etc/localtime
# We use the TZ variable rather than bind mount /etc/localtime
# into our containers since /etc/localtime is a symlink.
# If the container's timezone data directory has the same
# If the container's timezone data directory has the same
# layout as the host's then the bind mounted symlink would work.
# However, this cannot be guaranteed.
if [ -z "$TZ" ]; then
export TZ="$(basename $(dirname $(readlink /etc/localtime)))/$(basename $(readlink /etc/localtime))"
fi

docker-compose "$@"
docker-compose -f "$_COMPOSE_FILE" "$@"

# Change back to original directory
popd > /dev/null
5 changes: 5 additions & 0 deletions runtime/lib/docker-compose/no-rotate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
version: '3.3'

services:
converter:
command: run --no-rotate

0 comments on commit ee1c0a5

Please sign in to comment.