Skip to content

Commit 6556c06

Browse files
authored
Merge pull request #4 from ReagentX/develop
0.2.1
2 parents e7ec183 + 9c3e3c5 commit 6556c06

File tree

2 files changed

+25
-21
lines changed

2 files changed

+25
-21
lines changed

src/deserializer/read.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use crate::{
2222
///
2323
/// assert_eq!(slice, &[0x01, 0x02]);
2424
/// ```
25+
#[inline(always)]
2526
pub fn read_exact_bytes(data: &[u8], n: usize) -> Result<&[u8]> {
2627
let range = data
2728
.get(0..n)
@@ -45,6 +46,7 @@ pub fn read_exact_bytes(data: &[u8], n: usize) -> Result<&[u8]> {
4546
///
4647
/// assert_eq!(*byte, 0xFF);
4748
/// ```
49+
#[inline(always)]
4850
pub fn read_byte_at(data: &[u8], idx: usize) -> Result<&u8> {
4951
data.get(idx)
5052
.ok_or(TypedStreamError::OutOfBounds(idx, data.len()))
@@ -67,6 +69,7 @@ pub fn read_byte_at(data: &[u8], idx: usize) -> Result<&u8> {
6769
/// assert_eq!(consumed.value, 2);
6870
/// assert_eq!(consumed.bytes_consumed, 1);
6971
/// ```
72+
#[inline(always)]
7073
pub fn read_pointer(pointer: &u8) -> Result<Consumed<u64>> {
7174
let result = u64::from(*pointer)
7275
.checked_sub(REFERENCE_TAG)

src/deserializer/typedstream.rs

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
A writeup about the reverse engineering of `typedstream` can be found [here](https://chrissardegna.com/blog/reverse-engineering-apples-typedstream-format/).
55
*/
66

7-
use std::collections::HashSet;
8-
97
use crate::{
108
deserializer::{
119
constants::{EMPTY, END, START},
@@ -40,7 +38,7 @@ pub struct TypedStreamDeserializer<'a> {
4038
/// As we parse the `typedstream`, build a table of seen [`Archived`] data to reference in the future
4139
pub object_table: Vec<Archived<'a>>,
4240
/// We want to copy embedded types the first time they are seen, even if the types were resolved through references
43-
pub(crate) seen_embedded_types: HashSet<usize>,
41+
pub(crate) seen_embedded_types: Vec<usize>,
4442
}
4543

4644
impl<'a> TypedStreamDeserializer<'a> {
@@ -61,7 +59,7 @@ impl<'a> TypedStreamDeserializer<'a> {
6159
position: 0,
6260
type_table: Vec::with_capacity(16),
6361
object_table: Vec::with_capacity(32),
64-
seen_embedded_types: HashSet::with_capacity(8),
62+
seen_embedded_types: Vec::with_capacity(8),
6563
}
6664
}
6765

@@ -153,12 +151,15 @@ impl<'a> TypedStreamDeserializer<'a> {
153151
}
154152

155153
/// Reads the next byte from the stream, advancing the position.
154+
#[inline(always)]
156155
fn consume_current_byte(&mut self) -> Result<&u8> {
157156
let byte = read_byte_at(self.data, self.position)?;
158157
self.position += 1;
159158
Ok(byte)
160159
}
161160

161+
/// Reads an unsigned integer from the stream, advancing the position.
162+
#[inline(always)]
162163
fn read_unsigned_int(&mut self) -> Result<u64> {
163164
let unsigned_int = read_unsigned_int(&self.data[self.position..])?;
164165
self.position += unsigned_int.bytes_consumed;
@@ -211,11 +212,11 @@ impl<'a> TypedStreamDeserializer<'a> {
211212
}
212213

213214
fn read_class(&mut self) -> Result<Option<usize>> {
214-
// index of the first START we encounter (the bottom-most child)
215+
// Index of the first START we encounter (the bottom-most child)
215216
let mut first_new: Option<usize> = None;
216-
// index of the most recently pushed class (current “child”)
217+
// Index of the most recently pushed class (current “child”)
217218
let mut prev_new: Option<usize> = None;
218-
// parent for the outer-most new class (set by EMPTY or a pointer)
219+
// Parent for the outer-most new class (set by EMPTY or a pointer)
219220
let final_parent: Option<usize>;
220221

221222
loop {
@@ -316,8 +317,8 @@ impl<'a> TypedStreamDeserializer<'a> {
316317
}
317318

318319
/// Reads numeric types (signed, unsigned, float, double) and returns the corresponding `OutputData`
319-
fn read_number(&mut self, ty: &Type<'a>) -> Result<OutputData<'a>> {
320-
match ty {
320+
fn read_number(&mut self, table_index: usize, type_index: usize) -> Result<OutputData<'a>> {
321+
match self.type_table[table_index][type_index] {
321322
Type::SignedInt => {
322323
let signed_int = read_signed_int(&self.data[self.position..])?;
323324
self.position += signed_int.bytes_consumed;
@@ -343,12 +344,13 @@ impl<'a> TypedStreamDeserializer<'a> {
343344
}
344345

345346
fn read_types(&mut self, types_index: usize) -> Result<Option<Vec<OutputData<'a>>>> {
346-
// Clone types to avoid holding an immutable borrow on self during parsing
347-
let types = self.type_table[types_index].clone();
348-
let mut out_v = Vec::with_capacity(types.len());
347+
// Start reading types from the specified index in the type table
348+
let len = self.type_table[types_index].len();
349+
let mut out_v = Vec::with_capacity(len);
349350

350-
for ty in types {
351-
match ty {
351+
for i in 0..len {
352+
// Read the next type from the type table
353+
match self.type_table[types_index][i] {
352354
Type::Utf8String => {
353355
let str_data = read_string(&self.data[self.position..])?;
354356
self.position += str_data.bytes_consumed;
@@ -378,9 +380,9 @@ impl<'a> TypedStreamDeserializer<'a> {
378380
// Read a single byte for unknown data
379381
out_v.push(OutputData::Byte(byte));
380382
}
381-
// numeric types
383+
// Handle all numeric types
382384
Type::SignedInt | Type::UnsignedInt | Type::Float | Type::Double => {
383-
let val = self.read_number(&ty)?;
385+
let val = self.read_number(types_index, i)?;
384386
out_v.push(val);
385387
}
386388
}
@@ -400,20 +402,19 @@ impl<'a> TypedStreamDeserializer<'a> {
400402
// Get the type of the object
401403
let new_types = Type::read_new_type(&self.data[self.position..])?;
402404
let new_type_index = self.type_table.len();
403-
// Embedded data is stored as a String in the objects table
405+
// Embedded data is stored as a Type in the objects table
404406
if is_embedded_type {
405407
self.object_table.push(Archived::Type(new_type_index));
406408
// We only want to include the first embedded reference tag, not subsequent references to the same embed
407409
self.seen_embedded_types
408-
.insert(self.object_table.len().saturating_sub(1));
410+
.push(self.object_table.len().saturating_sub(1));
409411
}
410412

411413
self.type_table.push(new_types.value);
412414
self.position += new_types.bytes_consumed;
413415
Ok(Some(self.type_table.len() - 1))
414416
}
415-
EMPTY => Ok(None),
416-
END => Ok(None),
417+
END | EMPTY => Ok(None),
417418
ptr => {
418419
let pointer = read_pointer(&ptr)?;
419420
let ref_tag = pointer.value as usize;
@@ -428,7 +429,7 @@ impl<'a> TypedStreamDeserializer<'a> {
428429
&& self.type_table.get(ref_tag as usize).is_some()
429430
{
430431
self.object_table.push(Archived::Type(ref_tag));
431-
self.seen_embedded_types.insert(ref_tag);
432+
self.seen_embedded_types.push(ref_tag);
432433
}
433434
}
434435

0 commit comments

Comments
 (0)