Files
agnostic_orderbook
ahash
aho_corasick
arrayref
arrayvec
atty
base64
bincode
blake3
block_buffer
block_padding
borsh
borsh_derive
borsh_derive_internal
borsh_schema_derive_internal
bs58
bv
bytemuck
byteorder
cfg_if
constant_time_eq
cpufeatures
crunchy
crypto_mac
curve25519_dalek
derivative
dex_v3
digest
either
enumflags2
enumflags2_derive
env_logger
generic_array
getrandom
hashbrown
hex
hmac
hmac_drbg
humantime
itertools
keccak
lazy_static
libc
libm
libsecp256k1
libsecp256k1_core
log
memchr
memmap2
num_derive
num_enum
num_enum_derive
num_traits
opaque_debug
ppv_lite86
proc_macro2
quote
rand
rand_chacha
rand_core
rand_pcg
regex
regex_syntax
rustversion
serde
serde_bytes
serde_derive
sha2
sha3
solana_frozen_abi
solana_frozen_abi_macro
solana_logger
solana_program
solana_sdk_macro
spin
spl_token
subtle
syn
synstructure
termcolor
thiserror
thiserror_impl
typenum
unicode_xid
zeroize
zeroize_derive
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/// A heuristic frequency based detection of rare bytes for substring search.
///
/// This detector attempts to pick out two bytes in a needle that are predicted
/// to occur least frequently. The purpose is to use these bytes to implement
/// fast candidate search using vectorized code.
///
/// A set of offsets is only computed for needles of length 2 or greater.
/// Smaller needles should be special cased by the substring search algorithm
/// in use. (e.g., Use memchr for single byte needles.)
///
/// Note that we use `u8` to represent the offsets of the rare bytes in a
/// needle to reduce space usage. This means that rare byte occurring after the
/// first 255 bytes in a needle will never be used.
#[derive(Clone, Copy, Debug, Default)]
pub(crate) struct RareNeedleBytes {
    /// The leftmost offset of the rarest byte in the needle, according to
    /// pre-computed frequency analysis. The "leftmost offset" means that
    /// rare1i <= i for all i where needle[i] == needle[rare1i].
    rare1i: u8,
    /// The leftmost offset of the second rarest byte in the needle, according
    /// to pre-computed frequency analysis. The "leftmost offset" means that
    /// rare2i <= i for all i where needle[i] == needle[rare2i].
    ///
    /// The second rarest byte is used as a type of guard for quickly detecting
    /// a mismatch if the first byte matches. This is a hedge against
    /// pathological cases where the pre-computed frequency analysis may be
    /// off. (But of course, does not prevent *all* pathological cases.)
    ///
    /// In general, rare1i != rare2i by construction, although there is no hard
    /// requirement that they be different. However, since the case of a single
    /// byte needle is handled specially by memchr itself, rare2i generally
    /// always should be different from rare1i since it would otherwise be
    /// ineffective as a guard.
    rare2i: u8,
}

impl RareNeedleBytes {
    /// Create a new pair of rare needle bytes with the given offsets. This is
    /// only used in tests for generating input data.
    #[cfg(all(test, feature = "std"))]
    pub(crate) fn new(rare1i: u8, rare2i: u8) -> RareNeedleBytes {
        RareNeedleBytes { rare1i, rare2i }
    }

    /// Detect the leftmost offsets of the two rarest bytes in the given
    /// needle.
    pub(crate) fn forward(needle: &[u8]) -> RareNeedleBytes {
        if needle.len() <= 1 || needle.len() > core::u8::MAX as usize {
            // For needles bigger than u8::MAX, our offsets aren't big enough.
            // (We make our offsets small to reduce stack copying.)
            // If you have a use case for it, please file an issue. In that
            // case, we should probably just adjust the routine below to pick
            // some rare bytes from the first 255 bytes of the needle.
            //
            // Also note that for needles of size 0 or 1, they are special
            // cased in Two-Way.
            //
            // TODO: Benchmar this.
            return RareNeedleBytes { rare1i: 0, rare2i: 0 };
        }

        // Find the rarest two bytes. We make them distinct by construction.
        let (mut rare1, mut rare1i) = (needle[0], 0);
        let (mut rare2, mut rare2i) = (needle[1], 1);
        if rank(rare2) < rank(rare1) {
            core::mem::swap(&mut rare1, &mut rare2);
            core::mem::swap(&mut rare1i, &mut rare2i);
        }
        for (i, &b) in needle.iter().enumerate().skip(2) {
            if rank(b) < rank(rare1) {
                rare2 = rare1;
                rare2i = rare1i;
                rare1 = b;
                rare1i = i as u8;
            } else if b != rare1 && rank(b) < rank(rare2) {
                rare2 = b;
                rare2i = i as u8;
            }
        }
        // While not strictly required, we really don't want these to be
        // equivalent. If they were, it would reduce the effectiveness of
        // candidate searching using these rare bytes by increasing the rate of
        // false positives.
        assert_ne!(rare1i, rare2i);
        RareNeedleBytes { rare1i, rare2i }
    }

    /// Return the rare bytes in the given needle in the forward direction.
    /// The needle given must be the same one given to the RareNeedleBytes
    /// constructor.
    pub(crate) fn as_rare_bytes(&self, needle: &[u8]) -> (u8, u8) {
        (needle[self.rare1i as usize], needle[self.rare2i as usize])
    }

    /// Return the rare offsets such that the first offset is always <= to the
    /// second offset. This is useful when the caller doesn't care whether
    /// rare1 is rarer than rare2, but just wants to ensure that they are
    /// ordered with respect to one another.
    #[cfg(memchr_runtime_simd)]
    pub(crate) fn as_rare_ordered_usize(&self) -> (usize, usize) {
        let (rare1i, rare2i) = self.as_rare_ordered_u8();
        (rare1i as usize, rare2i as usize)
    }

    /// Like as_rare_ordered_usize, but returns the offsets as their native
    /// u8 values.
    #[cfg(memchr_runtime_simd)]
    pub(crate) fn as_rare_ordered_u8(&self) -> (u8, u8) {
        if self.rare1i <= self.rare2i {
            (self.rare1i, self.rare2i)
        } else {
            (self.rare2i, self.rare1i)
        }
    }

    /// Return the rare offsets as usize values in the order in which they were
    /// constructed. rare1, for example, is constructed as the "rarer" byte,
    /// and thus, callers may want to treat it differently from rare2.
    pub(crate) fn as_rare_usize(&self) -> (usize, usize) {
        (self.rare1i as usize, self.rare2i as usize)
    }

    /// Return the byte frequency rank of each byte. The higher the rank, the
    /// more frequency the byte is predicted to be. The needle given must be
    /// the same one given to the RareNeedleBytes constructor.
    pub(crate) fn as_ranks(&self, needle: &[u8]) -> (usize, usize) {
        let (b1, b2) = self.as_rare_bytes(needle);
        (rank(b1), rank(b2))
    }
}

/// Return the heuristical frequency rank of the given byte. A lower rank
/// means the byte is believed to occur less frequently.
fn rank(b: u8) -> usize {
    crate::memmem::byte_frequencies::BYTE_FREQUENCIES[b as usize] as usize
}