Skip to content

Commit

Permalink
datasets: move initial file reading to rust
Browse files Browse the repository at this point in the history
In a recent warning reported by scan-build, datasets were found to be
using a blocking call in a critical section.

datasets.c:187:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection]
  187 |     while (fgets(line, (int)sizeof(line), fp) != NULL) {
      |            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
datasets.c:292:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection]
  292 |     while (fgets(line, (int)sizeof(line), fp) != NULL) {
      |            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
datasets.c:368:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection]
  368 |     while (fgets(line, (int)sizeof(line), fp) != NULL) {
      |            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
datasets.c:442:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection]
  442 |     while (fgets(line, (int)sizeof(line), fp) != NULL) {
      |            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
datasets.c:512:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection]
  512 |     while (fgets(line, (int)sizeof(line), fp) != NULL) {
      |            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 warnings generated.

These calls are blocking in the multi tenant mode where several tenants
may be trying to load the same dataset in parallel.
In a single tenant mode, this operation is performed as a part of a
single thread before the engine startup.

In order to evade the warning and simplify the code, the initial file
reading is moved to Rust with this commit with a much simpler handling
of dataset and datarep.

Bug 7398
  • Loading branch information
inashivb committed Jan 9, 2025
1 parent 053e309 commit 5865684
Show file tree
Hide file tree
Showing 6 changed files with 162 additions and 71 deletions.
1 change: 1 addition & 0 deletions rust/cbindgen.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ include = [
"FtpEvent",
"SCSigTableElmt",
"SCTransformTableElmt",
"DataRepType",
]

# A list of items to not include in the generated bindings
Expand Down
151 changes: 151 additions & 0 deletions rust/src/detect/datasets.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
/* Copyright (C) 2025 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/

// Author: Shivani Bhardwaj <[email protected]>

//! This module exposes items from the datasets C code to Rust.
use crate::common::CallbackFatalErrorOnInit;
use base64::{self, Engine};
use std::ffi::{c_char, CStr, CString};
use std::fs::{File, OpenOptions};
use std::io::{self, BufRead};
use std::path::Path;

/// Opaque Dataset type defined in C
#[derive(Copy, Clone)]
pub enum Dataset {}

// Simple C type converted to Rust
#[derive(Debug, PartialEq)]
#[repr(C)]
pub struct DataRepType {
pub value: u16,
}

// Extern fns operating on the opaque Dataset type above
/// cbindgen:ignore
extern "C" {
pub fn DatasetAdd(set: &Dataset, data: *const u8, len: u32) -> i32;
pub fn DatasetAddwRep(set: &Dataset, data: *const u8, len: u32, rep: *const DataRepType)
-> i32;
}

#[no_mangle]
pub unsafe extern "C" fn ProcessStringDatasets(
set: &Dataset, name: *const c_char, fname: *const c_char, fmode: *const c_char,
) -> i32 {
let file_string = unwrap_or_return!(CStr::from_ptr(fname).to_str(), -2);
let mode = unwrap_or_return!(CStr::from_ptr(fmode).to_str(), -2);
let set_name = unwrap_or_return!(CStr::from_ptr(name).to_str(), -2);
let filename = Path::new(file_string);
let mut is_dataset = false;
let mut is_datarep = false;
if let Ok(lines) = read_lines(filename, mode) {
for line in lines.map_while(Result::ok) {
let v: Vec<&str> = line.split(',').collect();
// Ignore empty and invalid lines in dataset/rep file
if v.is_empty() || v.len() > 2 {
continue;
}
if v.len() == 1 {
if is_datarep {
SCLogError!(
"Cannot mix dataset and datarep values for set {} in {}",
set_name,
filename.display()
);
return -2;
}
is_dataset = true;
// Dataset
let mut decoded: Vec<u8> = vec![];
if base64::engine::general_purpose::STANDARD
.decode_vec(v[0], &mut decoded)
.is_err()
{
let msg = CString::new(format!(
"bad base64 encoding {} in {}",
set_name,
filename.display()
))
.unwrap();
CallbackFatalErrorOnInit(msg.as_ptr());
continue;
}
DatasetAdd(set, decoded.as_ptr(), decoded.len() as u32);
} else {
if is_dataset {
SCLogError!(
"Cannot mix dataset and datarep values for set {} in {}",
set_name,
filename.display()
);
return -2;
}
// Datarep
is_datarep = true;
let mut decoded: Vec<u8> = vec![];
if base64::engine::general_purpose::STANDARD
.decode_vec(v[0], &mut decoded)
.is_err()
{
let msg = CString::new(format!(
"bad base64 encoding {} in {}",
set_name,
filename.display()
))
.unwrap();
CallbackFatalErrorOnInit(msg.as_ptr());
continue;
}
if let Ok(val) = v[1].to_string().parse::<u16>() {
let rep: DataRepType = DataRepType { value: val };
DatasetAddwRep(set, decoded.as_ptr(), decoded.len() as u32, &rep);
} else {
let msg = CString::new(format!(
"invalid datarep value {} in {}",
set_name,
filename.display()
))
.unwrap();
CallbackFatalErrorOnInit(msg.as_ptr());
continue;
}
}
}
} else {
return -1;
}
0
}

fn read_lines<P>(filename: P, fmode: &str) -> io::Result<io::Lines<io::BufReader<File>>>
where
P: AsRef<Path>,
{
let file: File = if fmode == "r" {
File::open(filename)?
} else {
OpenOptions::new()
.append(true)
.create(true)
.read(true)
.open(filename)?
};
Ok(io::BufReader::new(file).lines())
}
1 change: 1 addition & 0 deletions rust/src/detect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ pub mod transforms;
pub mod uint;
pub mod uri;
pub mod tojson;
pub mod datasets;

use crate::core::AppProto;
use std::os::raw::{c_int, c_void};
Expand Down
4 changes: 1 addition & 3 deletions src/datasets-reputation.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@
#ifndef SURICATA_DATASETS_REPUTATION_H
#define SURICATA_DATASETS_REPUTATION_H

typedef struct DataRepType {
uint16_t value;
} DataRepType;
#include "rust-bindings.h"

typedef struct DataRepResultType {
bool found;
Expand Down
75 changes: 7 additions & 68 deletions src/datasets.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ SCMutex sets_lock = SCMUTEX_INITIALIZER;
static Dataset *sets = NULL;
static uint32_t set_ids = 0;

static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
DataRepType *rep);
int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep);

static inline void DatasetUnlockData(THashData *d)
{
Expand Down Expand Up @@ -496,80 +495,21 @@ static int DatasetLoadString(Dataset *set)
return 0;

SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);

const char *fopen_mode = "r";
if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
fopen_mode = "a+";
}

FILE *fp = fopen(set->load, fopen_mode);
if (fp == NULL) {
SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
int retval = ProcessStringDatasets(set, set->name, set->load, fopen_mode);
if (retval == -2) {
FatalErrorOnInit("dataset %s could not be processed", set->name);
} else if (retval == -1) {
return -1;
}

uint32_t cnt = 0;
char line[1024];
while (fgets(line, (int)sizeof(line), fp) != NULL) {
if (strlen(line) <= 1)
continue;

char *r = strchr(line, ',');
if (r == NULL) {
line[strlen(line) - 1] = '\0';
SCLogDebug("line: '%s'", line);
uint32_t decoded_size = Base64DecodeBufferSize(strlen(line));
// coverity[alloc_strlen : FALSE]
uint8_t decoded[decoded_size];
uint32_t num_decoded =
Base64Decode((const uint8_t *)line, strlen(line), Base64ModeStrict, decoded);
if (num_decoded == 0 && strlen(line) > 0) {
FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load);
continue;
}

if (DatasetAdd(set, (const uint8_t *)decoded, num_decoded) < 0) {
FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
continue;
}
cnt++;
} else {
line[strlen(line) - 1] = '\0';
SCLogDebug("line: '%s'", line);

*r = '\0';

uint32_t decoded_size = Base64DecodeBufferSize(strlen(line));
uint8_t decoded[decoded_size];
uint32_t num_decoded =
Base64Decode((const uint8_t *)line, strlen(line), Base64ModeStrict, decoded);
if (num_decoded == 0) {
FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load);
continue;
}

r++;
SCLogDebug("r '%s'", r);

DataRepType rep = { .value = 0 };
if (ParseRepLine(r, strlen(r), &rep) < 0) {
FatalErrorOnInit("die: bad rep");
continue;
}
SCLogDebug("rep %u", rep.value);

if (DatasetAddwRep(set, (const uint8_t *)decoded, num_decoded, &rep) < 0) {
FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
continue;
}
cnt++;

SCLogDebug("line with rep %s, %s", line, r);
}
}
THashConsolidateMemcap(set->hash);

fclose(fp);
SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
return 0;
}

Expand Down Expand Up @@ -1572,8 +1512,7 @@ int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
return -1;
}

static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
DataRepType *rep)
int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep)
{
if (set == NULL)
return -1;
Expand Down
1 change: 1 addition & 0 deletions src/datasets.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#define SURICATA_DATASETS_H

#include "util-thash.h"
#include "rust.h"
#include "datasets-reputation.h"

int DatasetsInit(void);
Expand Down

0 comments on commit 5865684

Please sign in to comment.