Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
282 changes: 20 additions & 262 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions cirup_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@ turso-rust = ["dep:tokio", "dep:turso", "dep:libsql"]
rusqlite-c = ["dep:rusqlite"]

[dependencies]
regex = "1.0"
serde = { version = "1.0", features = ["derive"] }
xml-rs = "0.8.0"
dot_json = "0.2.0"
lazy_static = "1.0.0"
prettytable-rs = "^0.10"
log = "0.4"
sha2 = "0.10"
unicode-width = "0.2"

[dependencies.uuid]
version = "0.6"
Expand Down
51 changes: 44 additions & 7 deletions cirup_core/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ use std::path::Path;

use std::collections::HashMap;
use std::sync::Mutex;
#[cfg(test)]
use std::time::Instant;

use sha2::{Digest, Sha256};

Expand Down Expand Up @@ -50,8 +52,14 @@ pub(crate) fn load_string_from_file(filename: &str) -> Result<String, Box<dyn Er
if let Some(text) = vfile_get(filename) {
return Ok(text);
}

let mut file = fs::File::open(filename)?;
let mut text = String::new();
let capacity = file
.metadata()
.ok()
.and_then(|metadata| usize::try_from(metadata.len()).ok())
.unwrap_or(0);
let mut text = String::with_capacity(capacity);
file.read_to_string(&mut text)?;
Ok(text)
}
Expand All @@ -74,13 +82,14 @@ fn should_write_output(output_hash: [u8; 32], existing_bytes: Option<&[u8]>, tou
output_hash != sha256_hash(existing_bytes)
}

fn encode_utf8(text: &str, output_encoding: OutputEncoding) -> Vec<u8> {
fn encode_utf8_owned(text: String, output_encoding: OutputEncoding) -> Vec<u8> {
match output_encoding {
OutputEncoding::Utf8NoBom => text.as_bytes().to_vec(),
OutputEncoding::Utf8NoBom => text.into_bytes(),
OutputEncoding::Utf8Bom => {
let text = text.into_bytes();
let mut output = Vec::with_capacity(UTF8_BOM.len() + text.len());
output.extend_from_slice(&UTF8_BOM);
output.extend_from_slice(text.as_bytes());
output.extend_from_slice(&text);
output
}
}
Expand All @@ -95,17 +104,17 @@ fn output_bytes_for_format(
FormatType::Json => {
let file_format = JsonFileFormat {};
let text = file_format.write_to_str(resources);
encode_utf8(&text, output_encoding)
encode_utf8_owned(text, output_encoding)
}
FormatType::Resx => {
let file_format = ResxFileFormat {};
let text = file_format.write_to_str(resources);
encode_utf8(&text, output_encoding)
encode_utf8_owned(text, output_encoding)
}
FormatType::Restext => {
let file_format = RestextFileFormat {};
let text = file_format.write_to_str(resources);
encode_utf8(&text, output_encoding)
encode_utf8_owned(text, output_encoding)
}
FormatType::Unknown => Vec::new(),
}
Expand Down Expand Up @@ -355,3 +364,31 @@ fn would_save_resource_file_reports_true_for_missing_output() {

assert!(would_write);
}

#[test]
#[ignore = "benchmark: run manually with --ignored --nocapture"]
#[allow(clippy::print_stdout)]
fn benchmark_output_bytes_for_format_large_input() {
let resources = (0..50_000usize)
.map(|index| Resource::new(&format!("group{index}.key{}", index % 13), &format!("value{index}")))
.collect::<Vec<_>>();

let started = Instant::now();
let utf8_no_bom = output_bytes_for_format(FormatType::Json, &resources, OutputEncoding::Utf8NoBom);
let utf8_no_bom_elapsed = started.elapsed();

let started = Instant::now();
let utf8_bom = output_bytes_for_format(FormatType::Json, &resources, OutputEncoding::Utf8Bom);
let utf8_bom_elapsed = started.elapsed();

assert!(utf8_no_bom.len() < utf8_bom.len());

println!(
"output-bytes benchmark: resources={} utf8_no_bom_bytes={} utf8_bom_bytes={} no_bom={:?} bom={:?}",
resources.len(),
utf8_no_bom.len(),
utf8_bom.len(),
utf8_no_bom_elapsed,
utf8_bom_elapsed
);
}
92 changes: 73 additions & 19 deletions cirup_core/src/json.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
extern crate dot_json;
extern crate serde;
extern crate serde_json;

use dot_json::value_to_dot;
use serde::Serialize;
use serde_json::{Map, Value};
#[cfg(test)]
use std::time::Instant;

use crate::Resource;
use crate::file::FileFormat;
Expand All @@ -14,23 +14,37 @@ use std::error::Error;
pub(crate) struct JsonFileFormat {}

fn json_dot_insert(root_map: &mut Map<String, Value>, name: &str, value: &str) {
if let Some(dot_index) = name.find('.') {
let root_path = &name[0..dot_index];
let child_path = &name[dot_index + 1..name.len()];
if let Some((root_path, child_path)) = name.split_once('.') {
let child_value = root_map
.entry(root_path.to_owned())
.or_insert_with(|| Value::Object(Map::new()));

if !root_map.contains_key(root_path) {
let child_map: Map<String, Value> = Map::new();
root_map.insert(root_path.to_owned(), Value::Object(child_map));
}

if let Some(Value::Object(child_map)) = root_map.get_mut(root_path) {
if let Value::Object(child_map) = child_value {
json_dot_insert(child_map, child_path, value);
}
} else {
root_map.insert(name.to_owned(), Value::String(value.to_owned()));
}
}

fn flatten_json_value(value: &Value, path: &mut String, resources: &mut Vec<Resource>) {
match value {
Value::Object(object) => {
for (key, child_value) in object {
let prefix_len = path.len();
if prefix_len > 0 {
path.push('.');
}
path.push_str(key);
flatten_json_value(child_value, path, resources);
path.truncate(prefix_len);
}
}
Value::String(text) => resources.push(Resource::new(path, text)),
_ => {}
}
}

fn json_to_string_pretty(value: &Map<String, Value>) -> String {
let writer = Vec::new();
let formatter = serde_json::ser::PrettyFormatter::with_indent(b" ");
Expand All @@ -47,17 +61,18 @@ impl FileFormat for JsonFileFormat {
fn parse_from_str(&self, text: &str) -> Result<Vec<Resource>, Box<dyn Error>> {
let mut resources: Vec<Resource> = Vec::new();
let root_value: Value = serde_json::from_str(text)?;
let root_value_dot = value_to_dot(&root_value);
let root_object_dot = match root_value_dot.as_object() {
let root_object = match root_value.as_object() {
Some(object) => object,
None => Err("json dot value is not an object")?,
None => Err("json value is not an object")?,
};
for (key, value) in root_object_dot.iter() {
if let Some(value) = value.as_str() {
let resource = Resource::new(key.as_str(), value);
resources.push(resource);
}

let mut path = String::new();
for (key, value) in root_object {
path.clear();
path.push_str(key);
flatten_json_value(value, &mut path, &mut resources);
}

Ok(resources)
}

Expand Down Expand Up @@ -161,3 +176,42 @@ fn test_json_write() {
//println!("{}", expected_text);
assert_eq!(actual_text, expected_text);
}

#[test]
#[ignore = "benchmark: run manually with --ignored --nocapture"]
#[allow(clippy::print_stdout)]
fn benchmark_json_parse_and_write_large_input() {
let file_format = JsonFileFormat {};
let repetitions = 5_000usize;
let mut resources = Vec::with_capacity(repetitions * 6);

for index in 0..repetitions {
let prefix = format!("group{index}");
resources.push(Resource::new(&format!("{prefix}.lblBoat"), "I'm on a boat."));
resources.push(Resource::new(&format!("{prefix}.lblYolo"), "You only live once"));
resources.push(Resource::new(&format!("{prefix}.lblDogs"), "Who let the dogs out?"));
resources.push(Resource::new(&format!("{prefix}.language.en"), "English"));
resources.push(Resource::new(&format!("{prefix}.language.fr"), "French"));
resources.push(Resource::new(&format!("{prefix}.very.deep.object"), "value"));
}

let started = Instant::now();
let written = file_format.write_to_str(&resources);
let write_elapsed = started.elapsed();

let started = Instant::now();
let reparsed = file_format
.parse_from_str(&written)
.unwrap_or_else(|e| panic!("json benchmark parse failed: {}", e));
let parse_elapsed = started.elapsed();

assert_eq!(reparsed.len(), resources.len());

println!(
"json benchmark: resources={} bytes={} write={:?} parse={:?}",
resources.len(),
written.len(),
write_elapsed,
parse_elapsed
);
}
4 changes: 0 additions & 4 deletions cirup_core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
extern crate regex;
extern crate treexml;
extern crate uuid;

Expand All @@ -10,9 +9,6 @@ extern crate serde_json;
#[macro_use]
extern crate log;

#[macro_use]
extern crate prettytable;

#[macro_use]
extern crate lazy_static;

Expand Down
Loading
Loading