binaryninja/
string_detection.rs

1//! Raw string detection using the same logic as the core strings analysis.
2
3use binaryninjacore_sys::*;
4
5use crate::binary_view::StringReference;
6use crate::rc::Array;
7use crate::settings::Settings;
8use crate::string::IntoCStr;
9
10/// Parameters controlling raw string detection, as used by the core strings analysis.
11#[derive(Clone, Debug)]
12pub struct StringDetectionParameters {
13    pub min_string_length: usize,
14    pub utf8_enabled: bool,
15    pub utf16_enabled: bool,
16    pub utf32_enabled: bool,
17    /// Unicode block names as accepted by the `analysis.unicode.blocks` setting.
18    pub unicode_block_names: Vec<String>,
19}
20
21impl StringDetectionParameters {
22    /// Builds parameters from the standard string-analysis settings:
23    /// `analysis.limits.minStringLength` and `analysis.unicode.{blocks,utf8,utf16,utf32}`.
24    pub fn from_settings(settings: &Settings) -> Self {
25        Self {
26            min_string_length: settings.get_integer("analysis.limits.minStringLength") as usize,
27            utf8_enabled: settings.get_bool("analysis.unicode.utf8"),
28            utf16_enabled: settings.get_bool("analysis.unicode.utf16"),
29            utf32_enabled: settings.get_bool("analysis.unicode.utf32"),
30            unicode_block_names: settings
31                .get_string_list("analysis.unicode.blocks")
32                .iter()
33                .map(|name| name.to_string())
34                .collect(),
35        }
36    }
37}
38
39impl Default for StringDetectionParameters {
40    fn default() -> Self {
41        Self {
42            min_string_length: 4,
43            utf8_enabled: true,
44            utf16_enabled: true,
45            utf32_enabled: true,
46            unicode_block_names: Vec::new(),
47        }
48    }
49}
50
51/// A compiled string detector using the same detection logic as the core strings analysis.
52///
53/// The detector is immutable once constructed, so a single instance may be shared across threads.
54pub struct StringDetector {
55    handle: *mut BNStringDetector,
56}
57
58impl StringDetector {
59    pub fn new(params: &StringDetectionParameters) -> Self {
60        let block_names: Vec<_> = params
61            .unicode_block_names
62            .iter()
63            .map(|name| name.as_str().to_cstr())
64            .collect();
65        let block_name_ptrs: Vec<*const _> = block_names.iter().map(|name| name.as_ptr()).collect();
66        let raw_params = BNStringDetectionParameters {
67            minStringLength: params.min_string_length,
68            utf8Enabled: params.utf8_enabled,
69            utf16Enabled: params.utf16_enabled,
70            utf32Enabled: params.utf32_enabled,
71            unicodeBlockNames: block_name_ptrs.as_ptr(),
72            unicodeBlockNameCount: block_name_ptrs.len(),
73        };
74        let handle = unsafe { BNCreateStringDetector(&raw_params) };
75        Self { handle }
76    }
77
78    /// Detects strings in a raw data buffer.
79    ///
80    /// Strings must start within the first `block_len` bytes of `data` but may extend to the end
81    /// of `data`, allowing large buffers to be scanned in chunks with a `BN_MAX_STRING_LENGTH`
82    /// overlap tail. `last_found` (optional, in/out, zero-initialized before the first call)
83    /// carries overlap state across consecutive chunk calls so strings spanning a chunk boundary
84    /// are not reported twice. Result addresses are relative to `base_address`.
85    pub fn detect_strings(
86        &self,
87        data: &[u8],
88        block_len: usize,
89        base_address: u64,
90        last_found: Option<&mut StringReference>,
91    ) -> Array<StringReference> {
92        let mut count = 0;
93        match last_found {
94            Some(last) => {
95                let mut raw_last: BNStringReference = (*last).into();
96                let strings = unsafe {
97                    BNStringDetectorDetectStrings(
98                        self.handle,
99                        data.as_ptr(),
100                        data.len(),
101                        block_len,
102                        base_address,
103                        &mut raw_last,
104                        &mut count,
105                    )
106                };
107                *last = raw_last.into();
108                unsafe { Array::new(strings, count, ()) }
109            }
110            None => {
111                let strings = unsafe {
112                    BNStringDetectorDetectStrings(
113                        self.handle,
114                        data.as_ptr(),
115                        data.len(),
116                        block_len,
117                        base_address,
118                        std::ptr::null_mut(),
119                        &mut count,
120                    )
121                };
122                unsafe { Array::new(strings, count, ()) }
123            }
124        }
125    }
126}
127
128impl Drop for StringDetector {
129    fn drop(&mut self) {
130        unsafe { BNFreeStringDetector(self.handle) };
131    }
132}
133
134// SAFETY: A StringDetector is immutable once created. The core performs no internal mutation
135// during BNStringDetectorDetectStrings.
136unsafe impl Send for StringDetector {}
137unsafe impl Sync for StringDetector {}