-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
datasets: move initial file reading to rust
In a recent warning reported by scan-build, datasets were found to be using a blocking call in a critical section. datasets.c:187:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection] 187 | while (fgets(line, (int)sizeof(line), fp) != NULL) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ datasets.c:292:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection] 292 | while (fgets(line, (int)sizeof(line), fp) != NULL) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ datasets.c:368:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection] 368 | while (fgets(line, (int)sizeof(line), fp) != NULL) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ datasets.c:442:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection] 442 | while (fgets(line, (int)sizeof(line), fp) != NULL) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ datasets.c:512:12: warning: Call to blocking function 'fgets' inside of critical section [unix.BlockInCriticalSection] 512 | while (fgets(line, (int)sizeof(line), fp) != NULL) { | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 warnings generated. These calls are blocking in the multi tenant mode where several tenants may be trying to load the same dataset in parallel. In a single tenant mode, this operation is performed as a part of a single thread before the engine startup. In order to evade the warning and simplify the code, the initial file reading is moved to Rust with this commit with a much simpler handling of dataset and datarep. Bug 7398
- Loading branch information
Showing
6 changed files
with
162 additions
and
71 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
/* Copyright (C) 2025 Open Information Security Foundation | ||
* | ||
* You can copy, redistribute or modify this Program under the terms of | ||
* the GNU General Public License version 2 as published by the Free | ||
* Software Foundation. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License | ||
* version 2 along with this program; if not, write to the Free Software | ||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | ||
* 02110-1301, USA. | ||
*/ | ||
|
||
// Author: Shivani Bhardwaj <[email protected]> | ||
|
||
//! This module exposes items from the datasets C code to Rust. | ||
use crate::common::CallbackFatalErrorOnInit; | ||
use base64::{self, Engine}; | ||
use std::ffi::{c_char, CStr, CString}; | ||
use std::fs::{File, OpenOptions}; | ||
use std::io::{self, BufRead}; | ||
use std::path::Path; | ||
|
||
/// Opaque Dataset type defined in C | ||
#[derive(Copy, Clone)] | ||
pub enum Dataset {} | ||
|
||
// Simple C type converted to Rust | ||
#[derive(Debug, PartialEq)] | ||
#[repr(C)] | ||
pub struct DataRepType { | ||
pub value: u16, | ||
} | ||
|
||
// Extern fns operating on the opaque Dataset type above | ||
/// cbindgen:ignore | ||
extern "C" { | ||
pub fn DatasetAdd(set: &Dataset, data: *const u8, len: u32) -> i32; | ||
pub fn DatasetAddwRep(set: &Dataset, data: *const u8, len: u32, rep: *const DataRepType) | ||
-> i32; | ||
} | ||
|
||
#[no_mangle] | ||
pub unsafe extern "C" fn ProcessStringDatasets( | ||
set: &Dataset, name: *const c_char, fname: *const c_char, fmode: *const c_char, | ||
) -> i32 { | ||
let file_string = unwrap_or_return!(CStr::from_ptr(fname).to_str(), -2); | ||
let mode = unwrap_or_return!(CStr::from_ptr(fmode).to_str(), -2); | ||
let set_name = unwrap_or_return!(CStr::from_ptr(name).to_str(), -2); | ||
let filename = Path::new(file_string); | ||
let mut is_dataset = false; | ||
let mut is_datarep = false; | ||
if let Ok(lines) = read_lines(filename, mode) { | ||
for line in lines.map_while(Result::ok) { | ||
let v: Vec<&str> = line.split(',').collect(); | ||
// Ignore empty and invalid lines in dataset/rep file | ||
if v.is_empty() || v.len() > 2 { | ||
continue; | ||
} | ||
if v.len() == 1 { | ||
if is_datarep { | ||
SCLogError!( | ||
"Cannot mix dataset and datarep values for set {} in {}", | ||
set_name, | ||
filename.display() | ||
); | ||
return -2; | ||
} | ||
is_dataset = true; | ||
// Dataset | ||
let mut decoded: Vec<u8> = vec![]; | ||
if base64::engine::general_purpose::STANDARD | ||
.decode_vec(v[0], &mut decoded) | ||
.is_err() | ||
{ | ||
let msg = CString::new(format!( | ||
"bad base64 encoding {} in {}", | ||
set_name, | ||
filename.display() | ||
)) | ||
.unwrap(); | ||
CallbackFatalErrorOnInit(msg.as_ptr()); | ||
continue; | ||
} | ||
DatasetAdd(set, decoded.as_ptr(), decoded.len() as u32); | ||
} else { | ||
if is_dataset { | ||
SCLogError!( | ||
"Cannot mix dataset and datarep values for set {} in {}", | ||
set_name, | ||
filename.display() | ||
); | ||
return -2; | ||
} | ||
// Datarep | ||
is_datarep = true; | ||
let mut decoded: Vec<u8> = vec![]; | ||
if base64::engine::general_purpose::STANDARD | ||
.decode_vec(v[0], &mut decoded) | ||
.is_err() | ||
{ | ||
let msg = CString::new(format!( | ||
"bad base64 encoding {} in {}", | ||
set_name, | ||
filename.display() | ||
)) | ||
.unwrap(); | ||
CallbackFatalErrorOnInit(msg.as_ptr()); | ||
continue; | ||
} | ||
if let Ok(val) = v[1].to_string().parse::<u16>() { | ||
let rep: DataRepType = DataRepType { value: val }; | ||
DatasetAddwRep(set, decoded.as_ptr(), decoded.len() as u32, &rep); | ||
} else { | ||
let msg = CString::new(format!( | ||
"invalid datarep value {} in {}", | ||
set_name, | ||
filename.display() | ||
)) | ||
.unwrap(); | ||
CallbackFatalErrorOnInit(msg.as_ptr()); | ||
continue; | ||
} | ||
} | ||
} | ||
} else { | ||
return -1; | ||
} | ||
0 | ||
} | ||
|
||
fn read_lines<P>(filename: P, fmode: &str) -> io::Result<io::Lines<io::BufReader<File>>> | ||
where | ||
P: AsRef<Path>, | ||
{ | ||
let file: File = if fmode == "r" { | ||
File::open(filename)? | ||
} else { | ||
OpenOptions::new() | ||
.append(true) | ||
.create(true) | ||
.read(true) | ||
.open(filename)? | ||
}; | ||
Ok(io::BufReader::new(file).lines()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters