Initial commit
This commit is contained in:
commit
26c5433d16
21 changed files with 1968 additions and 0 deletions
193
src/cli.rs
Normal file
193
src/cli.rs
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
//! Command line interface.
|
||||
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
|
||||
use {
|
||||
Ruleset,
|
||||
TransformError,
|
||||
TransformedRecord,
|
||||
};
|
||||
|
||||
use csv;
|
||||
|
||||
/// Configuration options for the `Cli`.
|
||||
pub struct Options
|
||||
{
|
||||
/// See `CsvOptions`.
|
||||
pub csv_options: CsvOptions,
|
||||
}
|
||||
|
||||
impl Default for Options {
|
||||
fn default() -> Options {
|
||||
Options {
|
||||
csv_options: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `Cli` configuration options specific to how to parse the CSV file.
|
||||
///
|
||||
/// `CsvOptions` implements `Default` with the following defaults:
|
||||
///
|
||||
/// ```
|
||||
/// extern crate csv;
|
||||
/// use csv_sanity::cli::CsvOptions;
|
||||
/// use csv::RecordTerminator;
|
||||
///
|
||||
/// let defaults = CsvOptions {
|
||||
/// delimiter: b',',
|
||||
/// record_terminator: csv::RecordTerminator::CRLF,
|
||||
/// quote: b'"',
|
||||
/// escape: None,
|
||||
/// double_quote: true,
|
||||
/// };
|
||||
/// assert_eq!(defaults, Default::default());
|
||||
/// ```
|
||||
pub struct CsvOptions
|
||||
{
|
||||
/// Field delimeter to expect in the CSV file.
|
||||
///
|
||||
/// Corresponds to the `csv::Reader.delimiter` method.
|
||||
pub delimiter: u8,
|
||||
/// Record terminator to expect in the CSV file.
|
||||
///
|
||||
/// Corresponds to the `csv::Reader.record_terminator` method. See `csv::RecordTerminator`.
|
||||
pub record_terminator: csv::RecordTerminator,
|
||||
/// Field quotation character to expect in the CSV file.
|
||||
///
|
||||
/// Corresponds to the `csv::Reader.quote` method.
|
||||
pub quote: u8,
|
||||
/// Escape character to expect in the CSV file.
|
||||
///
|
||||
/// Corresponds to the `csv::Reader.escape` method.
|
||||
pub escape: Option<u8>,
|
||||
/// Whether two adjacent quote characters should be interpreted as an escaped quote character.
|
||||
///
|
||||
/// Corresponds to the `csv::Reader.double_quote` method.
|
||||
pub double_quote: bool
|
||||
}
|
||||
|
||||
impl Default for CsvOptions
|
||||
{
|
||||
fn default() -> CsvOptions {
|
||||
CsvOptions {
|
||||
delimiter: b',',
|
||||
record_terminator: csv::RecordTerminator::CRLF,
|
||||
quote: b'"',
|
||||
escape: None,
|
||||
double_quote: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Command line interface for running a `Ruleset` against a CSV file.
|
||||
pub struct Cli
|
||||
{
|
||||
options: Options,
|
||||
ruleset: Ruleset,
|
||||
}
|
||||
|
||||
impl Cli
|
||||
{
|
||||
/// Construct a new `Cli` with default options.
|
||||
///
|
||||
/// ```
|
||||
/// use csv_sanity::Ruleset;
|
||||
/// use csv_sanity::cli::{
|
||||
/// Cli
|
||||
/// };
|
||||
///
|
||||
/// let ruleset = Ruleset::new();
|
||||
/// let cli = Cli::new(ruleset);
|
||||
/// ```
|
||||
pub fn new(ruleset: Ruleset) -> Cli {
|
||||
Self::new_with_options(ruleset, Default::default())
|
||||
}
|
||||
|
||||
/// Construct a new `Cli` with the specified options.
|
||||
///
|
||||
/// ```
|
||||
/// use csv_sanity::Ruleset;
|
||||
/// use csv_sanity::cli::{
|
||||
/// Cli,
|
||||
/// Options,
|
||||
/// CsvOptions
|
||||
/// };
|
||||
///
|
||||
/// let ruleset = Ruleset::new();
|
||||
/// let cli = Cli::new_with_options(ruleset, Options {
|
||||
/// csv_options: CsvOptions {
|
||||
/// delimiter: b',',
|
||||
/// .. Default::default()
|
||||
/// },
|
||||
/// .. Default::default()
|
||||
/// });
|
||||
/// ```
|
||||
pub fn new_with_options(ruleset: Ruleset, options: Options) -> Cli {
|
||||
Cli {
|
||||
options: options,
|
||||
ruleset: ruleset,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run<I: AsRef<Path>, O: AsRef<Path>, E: AsRef<Path>>(&self, input_file_path: I, output_file_name: O, error_file_name: E) {
|
||||
let (mut reader, headers) = self.reader_from_file(input_file_path);
|
||||
|
||||
let mut output_writer = csv::Writer::from_file(output_file_name).expect("Unable to open output file for writing");
|
||||
let mut output_headers = headers.clone();
|
||||
output_headers.insert(0, "Record Number".to_string());
|
||||
output_writer.encode(output_headers).expect("Unable to write to output file");
|
||||
|
||||
let mut error_writer = csv::Writer::from_file(error_file_name).expect("Unable to open error file for writing");
|
||||
let error_headers = vec![
|
||||
"Record Number",
|
||||
"Field Name",
|
||||
"Field Value",
|
||||
"Reason",
|
||||
];
|
||||
error_writer.encode(error_headers).expect("Unable to write to error file");
|
||||
|
||||
for (record_n, record) in reader.records().enumerate() {
|
||||
let original_line_n = record_n + 2; // Plus one for headers and plus one for zero-indexing.
|
||||
let transformed_record: TransformedRecord = match record {
|
||||
Err(e) => {
|
||||
let err = TransformError {
|
||||
field_value: "".to_string(),
|
||||
field_name: "".to_string(),
|
||||
record_n: original_line_n,
|
||||
reason: format!("{}", e),
|
||||
};
|
||||
error_writer.encode(err).expect("Unable to write to error file");
|
||||
continue;
|
||||
},
|
||||
Ok(ref rec) => self.ruleset.apply_rules(&headers, rec, original_line_n)
|
||||
};
|
||||
let record_fields: Vec<Option<String>> = {
|
||||
let mut fs = vec![Some(original_line_n.to_string())];
|
||||
fs.extend(transformed_record.field_values);
|
||||
fs
|
||||
};
|
||||
output_writer.encode(record_fields).expect("Unable to write to output file");
|
||||
for error in transformed_record.errors {
|
||||
error_writer.encode(error).expect("Unable to write to error file");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn reader_from_file<P: AsRef<Path>>(&self, path: P) -> (csv::Reader<File>, Vec<String>) {
|
||||
let mut reader = csv::Reader::from_file(path.as_ref().clone()).map(|r| {
|
||||
// Configure the reader according to the options passed to the Cli constructor.
|
||||
r.has_headers(true)
|
||||
.delimiter(self.options.csv_options.delimiter)
|
||||
.record_terminator(self.options.csv_options.record_terminator)
|
||||
.quote(self.options.csv_options.quote)
|
||||
.escape(self.options.csv_options.escape)
|
||||
.double_quote(self.options.csv_options.double_quote)
|
||||
.flexible(true)
|
||||
}).expect(&format!("Unable to read file {}", path.as_ref().display()));
|
||||
let headers = reader.headers()
|
||||
.expect(&format!("Unable to read headers from input file {}", path.as_ref().display()));
|
||||
(reader, headers)
|
||||
}
|
||||
}
|
||||
36
src/lib.rs
Normal file
36
src/lib.rs
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
#[macro_use]
|
||||
extern crate serde_derive;
|
||||
extern crate serde;
|
||||
extern crate serde_json;
|
||||
extern crate regex;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
extern crate unicode_segmentation;
|
||||
extern crate time;
|
||||
extern crate csv;
|
||||
#[macro_use]
|
||||
extern crate custom_derive;
|
||||
#[macro_use]
|
||||
extern crate newtype_derive;
|
||||
extern crate rustc_serialize;
|
||||
|
||||
mod newtypes;
|
||||
|
||||
pub mod transformer;
|
||||
pub use transformer::{
|
||||
Transformer,
|
||||
TransformResult,
|
||||
TransformResultHelper,
|
||||
TransformError
|
||||
};
|
||||
|
||||
pub mod transformers;
|
||||
|
||||
mod ruleset;
|
||||
pub use ruleset::{
|
||||
Rule,
|
||||
Ruleset,
|
||||
TransformedRecord,
|
||||
};
|
||||
|
||||
pub mod cli;
|
||||
111
src/main.rs
Normal file
111
src/main.rs
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
extern crate csv_sanity;
|
||||
|
||||
extern crate serde_json;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate regex;
|
||||
#[macro_use]
|
||||
extern crate clap;
|
||||
|
||||
use csv_sanity::cli::{
|
||||
self,
|
||||
Cli,
|
||||
};
|
||||
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
use log::{
|
||||
LogRecord,
|
||||
LogLevel,
|
||||
LogMetadata,
|
||||
LogLevelFilter,
|
||||
SetLoggerError
|
||||
};
|
||||
use clap::{
|
||||
App,
|
||||
Arg
|
||||
};
|
||||
|
||||
struct ConsoleLogger {
|
||||
log_level: LogLevel
|
||||
}
|
||||
|
||||
impl log::Log for ConsoleLogger {
|
||||
fn enabled(&self, metadata: &LogMetadata) -> bool {
|
||||
metadata.level() <= self.log_level
|
||||
}
|
||||
|
||||
fn log(&self, record: &LogRecord) {
|
||||
if self.enabled(record.metadata()) {
|
||||
println!("{} - {}", record.level(), record.args())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn init_logging() -> Result<(), SetLoggerError> {
|
||||
log::set_logger(|max_log_level| {
|
||||
max_log_level.set(LogLevelFilter::Info);
|
||||
Box::new(ConsoleLogger { log_level: LogLevel::Info })
|
||||
})
|
||||
}
|
||||
|
||||
fn main() {
|
||||
init_logging().unwrap();
|
||||
|
||||
let matches = App::new("Convert CSV")
|
||||
.version(crate_version!())
|
||||
.author("M. George Hansen <technopolitica@gmail.com>")
|
||||
.about("Apply a set of transformations to the records in a CSV file, attempting to read a much valid information from the file as possible.")
|
||||
.arg(Arg::with_name("INPUT_FILE")
|
||||
.help("CSV file to process")
|
||||
.required(true)
|
||||
.index(1))
|
||||
.arg(Arg::with_name("output")
|
||||
.help("File to output the transformed CSV records. Defaults to ./output.csv")
|
||||
.short("o")
|
||||
.long("output")
|
||||
.takes_value(true))
|
||||
.arg(Arg::with_name("error_output")
|
||||
.help("File to output errors in CSV format. Defaults to ./errors.csv")
|
||||
.short("e")
|
||||
.long("error_output")
|
||||
.takes_value(true))
|
||||
.arg(Arg::with_name("ruleset")
|
||||
.help("JSON file containing the ruleset to apply. Defaults to ./ruleset.json")
|
||||
.short("r")
|
||||
.long("ruleset")
|
||||
.takes_value(true))
|
||||
.get_matches();
|
||||
|
||||
let ruleset_file_path = Path::new(matches.value_of("ruleset").unwrap_or("ruleset.json"));
|
||||
let ruleset_file = match File::open(ruleset_file_path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => exit_with_error(&format!("unable to read ruleset file {}: {}", ruleset_file_path.display(), e))
|
||||
};
|
||||
let ruleset = match serde_json::from_reader(ruleset_file) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
exit_with_error(&format!("failed to parse ruleset from {}: {}", ruleset_file_path.display(), e));
|
||||
}
|
||||
};
|
||||
|
||||
let cli_app = Cli::new_with_options(ruleset, cli::Options {
|
||||
csv_options: cli::CsvOptions {
|
||||
delimiter: b'\t',
|
||||
.. Default::default()
|
||||
},
|
||||
.. Default::default()
|
||||
});
|
||||
|
||||
// NOTE: Required arguments are validated by clap, so we should be safe to use expect here.
|
||||
let input_file_name = matches.value_of("INPUT_FILE").expect("INPUT_FILE argument could not be found!");
|
||||
let output_file_name = matches.value_of("output_file").unwrap_or("output.csv");
|
||||
let error_file_name = matches.value_of("error_file").unwrap_or("errors.csv");
|
||||
cli_app.run(input_file_name, output_file_name, error_file_name);
|
||||
}
|
||||
|
||||
fn exit_with_error(error_msg: &str) -> !
|
||||
{
|
||||
error!("{}", error_msg);
|
||||
std::process::exit(1);
|
||||
}
|
||||
59
src/newtypes.rs
Normal file
59
src/newtypes.rs
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
use std::hash::{
|
||||
Hash,
|
||||
Hasher,
|
||||
};
|
||||
use regex;
|
||||
use serde::{
|
||||
Serialize,
|
||||
Serializer,
|
||||
Deserialize,
|
||||
Deserializer,
|
||||
};
|
||||
|
||||
custom_derive! {
|
||||
#[derive(NewtypeFrom, NewtypeDeref, NewtypeDerefMut, Clone, NewtypeDisplay, NewtypeDebug)]
|
||||
pub struct Regex(regex::Regex);
|
||||
}
|
||||
|
||||
impl PartialEq for Regex {
|
||||
fn eq(&self, other: &Regex) -> bool
|
||||
{
|
||||
self.0.as_str() == other.0.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for Regex {}
|
||||
|
||||
impl Hash for Regex {
|
||||
fn hash<H>(&self, state: &mut H)
|
||||
where H: Hasher {
|
||||
self.as_str().hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Regex
|
||||
{
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where S: Serializer {
|
||||
let Regex(ref regex) = *self;
|
||||
regex.as_str().serialize(serializer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for Regex
|
||||
{
|
||||
fn deserialize<D>(deserializer: D) -> Result<Regex, D::Error>
|
||||
where D: Deserializer<'de>
|
||||
{
|
||||
use serde::de::{Unexpected, Error};
|
||||
let string: Result<String, D::Error> = Deserialize::deserialize(deserializer);
|
||||
string.and_then(|s| {
|
||||
regex::Regex::new(&s)
|
||||
.map(|r| Regex(r))
|
||||
.map_err(|e| {
|
||||
let message: &str = &format!("invalid regex string: {}", e);
|
||||
D::Error::invalid_value(Unexpected::Str(&s), &message)
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
345
src/ruleset.rs
Normal file
345
src/ruleset.rs
Normal file
|
|
@ -0,0 +1,345 @@
|
|||
use Transformer;
|
||||
use transformer::{
|
||||
TransformResult,
|
||||
TransformError,
|
||||
};
|
||||
use transformers::{
|
||||
Transformers,
|
||||
TrimTransformer,
|
||||
NoneTransformer,
|
||||
};
|
||||
|
||||
use std::hash::{
|
||||
Hash,
|
||||
Hasher,
|
||||
};
|
||||
use std::iter::FromIterator;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{
|
||||
BinaryHeap,
|
||||
HashSet,
|
||||
};
|
||||
use std::error;
|
||||
use std::fmt::{
|
||||
self,
|
||||
Formatter,
|
||||
Display,
|
||||
};
|
||||
|
||||
/// Applicability of a `Rule` determining which CSV record's fields it can be applied to.
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
pub enum Applicability {
|
||||
/// Applicable to all CSV record fields.
|
||||
Global,
|
||||
/// Applicable to a subset of a CSV record's fields, specified by field name.
|
||||
Fields {
|
||||
field_names: HashSet<String>
|
||||
}
|
||||
}
|
||||
|
||||
impl Hash for Applicability {
|
||||
fn hash<H>(&self, state: &mut H)
|
||||
where H: Hasher {
|
||||
use self::Applicability::*;
|
||||
match *self {
|
||||
Global => (self as *const Applicability).hash(state), // FIXME: Is this the correct way to hash an empty enum variant?
|
||||
Fields { ref field_names } => field_names.iter().collect::<Vec<&String>>().hash(state)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn priority_is_default(priority: &isize) -> bool {
|
||||
priority == &0
|
||||
}
|
||||
|
||||
/// A `Transformer` paired with `Applicability` and a priority which can be applied to fields in a
|
||||
/// CSV record.
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
|
||||
pub struct Rule
|
||||
{
|
||||
applicability: Applicability,
|
||||
transformer: Transformers,
|
||||
#[serde(default, skip_serializing_if="priority_is_default")]
|
||||
priority: isize
|
||||
}
|
||||
|
||||
impl Rule
|
||||
{
|
||||
/// Construct a new `Rule` whoe `Transformer` is applicable to one or more CSV record's fields
|
||||
/// referenced by name with the default priority of 0.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// use csv_sanity::Rule;
|
||||
/// use csv_sanity::transformers::*;
|
||||
///
|
||||
/// let rule = Rule::for_fields(&["First Name", "Last Name"], Transformers::Capitalize(
|
||||
/// CapitalizeTransformer::new()
|
||||
/// ));
|
||||
/// ```
|
||||
pub fn for_fields(field_names: &[&str], transformer: Transformers) -> Rule {
|
||||
Self::for_fields_with_priority(field_names, transformer, Default::default())
|
||||
}
|
||||
|
||||
/// Construct a new `Rule` whoe `Transformer` is applicable to one or more CSV record's fields
|
||||
/// referenced by name with the specified priority.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// use csv_sanity::Rule;
|
||||
/// use csv_sanity::transformers::*;
|
||||
///
|
||||
/// let rule = Rule::for_fields_with_priority(&["Fist Name", "Last Name"], Transformers::Capitalize(
|
||||
/// CapitalizeTransformer::new()
|
||||
/// ), 10);
|
||||
/// ```
|
||||
pub fn for_fields_with_priority(field_names: &[&str], transformer: Transformers, priority: isize) -> Rule {
|
||||
Rule {
|
||||
applicability: Applicability::Fields { field_names: field_names.iter().map(|s| s.to_string()).collect() },
|
||||
transformer: transformer,
|
||||
priority: priority
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct a new `Rule` applicable to all of a CSV record's fields with the default priority
|
||||
/// of 0.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// use csv_sanity::Rule;
|
||||
/// use csv_sanity::transformers::*;
|
||||
///
|
||||
/// let rule = Rule::global(Transformers::Capitalize(
|
||||
/// CapitalizeTransformer::new()
|
||||
/// ));
|
||||
/// ```
|
||||
pub fn global(transformer: Transformers) -> Rule {
|
||||
Self::global_with_priority(transformer, Default::default())
|
||||
}
|
||||
|
||||
/// Construct a new `Rule` applicable to all of a CSV record's fields with the specified
|
||||
/// priority.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// use csv_sanity::Rule;
|
||||
/// use csv_sanity::transformers::*;
|
||||
///
|
||||
/// let rule = Rule::global_with_priority(Transformers::Capitalize(
|
||||
/// CapitalizeTransformer::new()
|
||||
/// ), 10);
|
||||
/// ```
|
||||
pub fn global_with_priority(transformer: Transformers, priority: isize) -> Rule {
|
||||
Rule {
|
||||
applicability: Applicability::Global,
|
||||
transformer: transformer,
|
||||
priority: priority
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply this rule to a CSV record's field, returning the resulting `TransformResult`.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// use csv_sanity::Rule;
|
||||
/// use csv_sanity::transformers::*;
|
||||
///
|
||||
/// let field = "JOHN";
|
||||
/// let field_name = "First Name";
|
||||
///
|
||||
/// let rule = Rule::for_fields(&["First Name", "Last Name"], Transformers::Capitalize(
|
||||
/// CapitalizeTransformer::new()
|
||||
/// ));
|
||||
/// rule.apply(field, field_name, 1);
|
||||
/// ```
|
||||
pub fn apply(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
|
||||
// XXX: Does the applicability check belong inside the apply method? Or should the caller
|
||||
// decide?
|
||||
match self.applicability {
|
||||
Applicability::Global => self.transformer.transform(field_value, field_name, record_n),
|
||||
Applicability::Fields { ref field_names } if field_names.contains(&field_name.to_string()) => {
|
||||
self.transformer.transform(field_value, field_name, record_n)
|
||||
},
|
||||
_ => Ok(Some(field_value.to_string()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for Rule
|
||||
{
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
other.priority.cmp(&self.priority)
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for Rule
|
||||
{
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
/// An ordered set of `Rule`s sorted by priority.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// use csv_sanity::{
|
||||
/// Ruleset,
|
||||
/// Rule,
|
||||
/// TransformedRecord,
|
||||
/// };
|
||||
/// use csv_sanity::transformers::*;
|
||||
/// let ruleset = {
|
||||
/// let mut r = Ruleset::new();
|
||||
/// r.add_rule(Rule::for_fields(&["First Name", "Last Name"], Transformers::Capitalize(
|
||||
/// CapitalizeTransformer::new()
|
||||
/// )));
|
||||
/// r.add_rule(Rule::for_fields(&["Email"], Transformers::Email(
|
||||
/// EmailTransformer::new()
|
||||
/// )));
|
||||
/// r
|
||||
/// };
|
||||
/// let headers = vec!["Id", "First Name", "Last Name", "Email"].iter().map(|s| s.to_string()).collect();
|
||||
/// let record = vec!["1", " JOHN", "SNOW ", "\t JSNOW@EXAMPLE.COM "].iter().map(|s| s.to_string()).collect();
|
||||
/// let transformed_record = ruleset.apply_rules(&headers, &record, 1);
|
||||
/// assert_eq!(TransformedRecord {
|
||||
/// field_values: vec!["1", "John", "Snow", "jsnow@example.com"].iter().map(|s| Some(s.to_string())).collect(),
|
||||
/// errors: Vec::new(),
|
||||
/// }, transformed_record);
|
||||
/// ```
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub struct Ruleset {
|
||||
rules: BinaryHeap<Rule>
|
||||
}
|
||||
|
||||
impl Ruleset {
|
||||
/// Construct a new `Ruleset` with a default `NoneTransformer` and `TrimTransformer` global
|
||||
/// rules.
|
||||
///
|
||||
/// The default trim and none rules should be appropriate for most CSV files. For CSV files
|
||||
/// where these default rules are not desired use the `Ruleset::without_default_rules` method.
|
||||
pub fn new() -> Ruleset {
|
||||
let mut ruleset = Self::without_default_rules();
|
||||
// Add a default trim rule and blank rule to match empty fields.
|
||||
ruleset.add_rule(Rule::global_with_priority(Transformers::None(NoneTransformer::with_blank_matcher()), -10));
|
||||
ruleset.add_rule(Rule::global_with_priority(Transformers::Trim(TrimTransformer::new()), -10));
|
||||
ruleset
|
||||
}
|
||||
|
||||
/// Construct a new `Ruleset` without any of the default rules.
|
||||
pub fn without_default_rules() -> Ruleset {
|
||||
Ruleset {
|
||||
rules: BinaryHeap::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a `Rule` to the this ruleset.
|
||||
pub fn add_rule(&mut self, rule: Rule) {
|
||||
self.rules.push(rule);
|
||||
}
|
||||
|
||||
/// Validate this ruleset against a CSV file by comparing it's `Rule`s against the headers.
|
||||
pub fn validate_rules(&self, headers: &Vec<String>) -> Result<(), Vec<ValidationError>> {
|
||||
let mut errors = Vec::new();
|
||||
for rule in self.rules.iter() {
|
||||
if let Applicability::Fields { ref field_names } = rule.applicability {
|
||||
let header_set = HashSet::<String>::from_iter(headers.clone());
|
||||
let field_set = HashSet::<String>::from_iter(field_names.clone());
|
||||
let diff: HashSet<String> = field_set.difference(&header_set).cloned().collect();
|
||||
if diff.len() > 0 {
|
||||
// FIXME: We should have a better way to construct a ruleset that uses Result
|
||||
// instead of panic! here.
|
||||
errors.push(
|
||||
ValidationError {
|
||||
reason: format!("The following fields were not found in headers: '{:?}'", diff),
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
if errors.is_empty() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(errors)
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply this `Ruleset` to a record from a CSV file.
|
||||
pub fn apply_rules(&self, headers: &Vec<String>, fields: &Vec<String>, record_n: usize) -> TransformedRecord {
|
||||
let expected_n_fields = headers.len();
|
||||
|
||||
let mut errors: Vec<TransformError> = Vec::new();
|
||||
let mut transformed_fields: Vec<Option<String>> = Vec::new();
|
||||
for (field_n, field_value) in fields.iter().enumerate() {
|
||||
if field_n < expected_n_fields {
|
||||
let field_name = &headers[field_n];
|
||||
let mut transformed_field_value = Some(field_value.clone());
|
||||
// Try each rule in order of priority and test to see if it is applicable.
|
||||
for rule in self.rules.iter() {
|
||||
let new_value = match transformed_field_value {
|
||||
Some(ref fv) => {
|
||||
let transform_result = rule.apply(fv, &field_name, record_n);
|
||||
match transform_result {
|
||||
Ok(tfv) => tfv,
|
||||
Err(e) => {
|
||||
errors.push(e);
|
||||
None
|
||||
}
|
||||
}
|
||||
},
|
||||
// The last transformer returned None, so we can short circuit and just
|
||||
// return None for the field value.
|
||||
None => break
|
||||
};
|
||||
transformed_field_value = new_value;
|
||||
}
|
||||
transformed_fields.insert(field_n, transformed_field_value);
|
||||
} else {
|
||||
errors.push(
|
||||
TransformError {
|
||||
field_value: field_value.to_string(),
|
||||
field_name: field_n.to_string(),
|
||||
record_n: record_n,
|
||||
reason: format!("found {} header fields but record had extra field at position {}", expected_n_fields, field_n)
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
TransformedRecord {
|
||||
field_values: transformed_fields,
|
||||
errors: errors,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Error for when a `Ruleset` does not validate against a CSV file.
|
||||
#[derive(Serialize, Deserialize, Clone, PartialEq, Eq, Hash, Debug)]
|
||||
pub struct ValidationError {
|
||||
reason: String,
|
||||
}
|
||||
|
||||
impl Display for ValidationError
|
||||
{
|
||||
fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
|
||||
write!(formatter, "{}", self.reason)
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for ValidationError
|
||||
{
|
||||
fn description(&self) -> &str {
|
||||
&self.reason
|
||||
}
|
||||
}
|
||||
|
||||
/// A single processed and transformed record.
|
||||
#[derive(Serialize, Deserialize, Eq, PartialEq, Hash, Debug)]
|
||||
pub struct TransformedRecord {
|
||||
/// Transformed fields for the record.
|
||||
///
|
||||
/// Empty field are explicitly encoded as `None` values.
|
||||
pub field_values: Vec<Option<String>>,
|
||||
/// Errors that were encountered during transformation, if any.
|
||||
pub errors: Vec<TransformError>,
|
||||
}
|
||||
91
src/transformer.rs
Normal file
91
src/transformer.rs
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
//! Traits and types that define transformations on CSV record fields.
|
||||
|
||||
use std::result;
|
||||
use std::error;
|
||||
use std::fmt::{
|
||||
self,
|
||||
Formatter,
|
||||
Display,
|
||||
};
|
||||
|
||||
/// `Result` for the transformation of a CSV record's field, either an `Option<String>` if
|
||||
/// successfully transformed or a `TransformError` if unsuccessful.
|
||||
pub type TransformResult = result::Result<Option<String>, TransformError>;
|
||||
|
||||
/// Helper trait with a few useful utility methods for constructing `TransformResult`.
|
||||
pub trait TransformResultHelper
|
||||
{
|
||||
/// Construct a `TransformResult` that represents a successful transformation of a CSV record's
|
||||
/// field with a non-empty value.
|
||||
fn present(value: &str) -> TransformResult {
|
||||
Ok(Some(value.to_string()))
|
||||
}
|
||||
|
||||
/// Construct a `TransformResult` that represents a successful tranformation of a CSV record's
|
||||
/// field with an empty value.
|
||||
fn excluded() -> TransformResult {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Construct a `TransformResult` that represents a failed transformation of a CSV record's
|
||||
/// field with a descritive error reason.
|
||||
///
|
||||
/// An error reason should be a short, single sentence without punctuation or capitization,
|
||||
/// e.g. "not a valid email address" instead of "The email address was invalid.".
|
||||
///
|
||||
/// ```
|
||||
/// use csv_sanity::transformer::{
|
||||
/// TransformResult,
|
||||
/// TransformError,
|
||||
/// TransformResultHelper,
|
||||
/// };
|
||||
///
|
||||
/// let result = TransformResult::error("jak,.@hot mail.com", "Email", 0, "not a valid email address");
|
||||
/// assert_eq!(result, Err(TransformError {
|
||||
/// field_value: "jak,.@hot mail.com".to_string(),
|
||||
/// field_name: "Email".to_string(),
|
||||
/// record_n: 0,
|
||||
/// reason: "not a valid email address".to_string(),
|
||||
/// }));
|
||||
/// ```
|
||||
fn error(field_value: &str, field_name: &str, record_n: usize, reason: &str) -> TransformResult {
|
||||
Err(
|
||||
TransformError {
|
||||
field_value: field_value.to_string(),
|
||||
field_name: field_name.to_string(),
|
||||
record_n: record_n,
|
||||
reason: reason.to_string(),
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl TransformResultHelper for TransformResult {}
|
||||
|
||||
pub trait Transformer
|
||||
{
|
||||
fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult;
|
||||
}
|
||||
|
||||
#[derive(RustcEncodable, Deserialize, Serialize, Clone, PartialEq, Eq, Hash, Debug)]
|
||||
pub struct TransformError
|
||||
{
|
||||
pub record_n: usize,
|
||||
pub field_name: String,
|
||||
pub field_value: String,
|
||||
pub reason: String,
|
||||
}
|
||||
|
||||
impl Display for TransformError
|
||||
{
|
||||
fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
|
||||
write!(formatter, "failed to transform field: {}", self.reason)
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for TransformError
|
||||
{
|
||||
fn description(&self) -> &str {
|
||||
&self.reason
|
||||
}
|
||||
}
|
||||
41
src/transformers/capitalize.rs
Normal file
41
src/transformers/capitalize.rs
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
use Transformer;
|
||||
use transformer::{
|
||||
TransformResultHelper,
|
||||
TransformResult
|
||||
};
|
||||
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
pub fn capitalize(string: &str) -> String
|
||||
{
|
||||
string.unicode_words()
|
||||
.map(capitalize_word).collect::<Vec<String>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn capitalize_word(word: &str) -> String
|
||||
{
|
||||
word.chars().enumerate()
|
||||
.map(|(i, c)| if i == 0 { c.to_uppercase().collect::<String>() } else { c.to_lowercase().collect() })
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
|
||||
pub struct CapitalizeTransformer {}
|
||||
|
||||
impl CapitalizeTransformer
|
||||
{
|
||||
pub fn new() -> CapitalizeTransformer
|
||||
{
|
||||
CapitalizeTransformer {}
|
||||
}
|
||||
}
|
||||
|
||||
impl Transformer for CapitalizeTransformer
|
||||
{
|
||||
fn transform(&self, field_value: &str, _: &str, _: usize) -> TransformResult
|
||||
{
|
||||
let result = capitalize(field_value);
|
||||
TransformResult::present(&result)
|
||||
}
|
||||
}
|
||||
37
src/transformers/choice.rs
Normal file
37
src/transformers/choice.rs
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
use Transformer;
|
||||
use transformer::{
|
||||
TransformResultHelper,
|
||||
TransformResult
|
||||
};
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
|
||||
pub struct ChoiceTransformer {
|
||||
choices: Vec<String>,
|
||||
}
|
||||
|
||||
impl ChoiceTransformer
|
||||
{
|
||||
pub fn new(choices: Vec<String>) -> ChoiceTransformer
|
||||
{
|
||||
ChoiceTransformer {
|
||||
choices: choices,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Transformer for ChoiceTransformer
|
||||
{
|
||||
fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult
|
||||
{
|
||||
if self.choices.contains(&field_value.to_string()) {
|
||||
TransformResult::present(&field_value)
|
||||
} else {
|
||||
TransformResult::error(
|
||||
field_value,
|
||||
field_name,
|
||||
record_n,
|
||||
&format!("not in valid choices {:?}", self.choices)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
41
src/transformers/date.rs
Normal file
41
src/transformers/date.rs
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
use Transformer;
|
||||
use transformer::{
|
||||
TransformResultHelper,
|
||||
TransformResult
|
||||
};
|
||||
|
||||
use time::{
|
||||
strptime
|
||||
};
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
|
||||
pub struct DateTransformer {
|
||||
input_formats: Vec<String>,
|
||||
output_format: String
|
||||
}
|
||||
|
||||
impl DateTransformer {
|
||||
pub fn new(input_formats: Vec<String>, output_format: &str) -> DateTransformer {
|
||||
DateTransformer {
|
||||
input_formats: input_formats,
|
||||
output_format: output_format.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_iso8601_output(input_formats: Vec<String>) -> DateTransformer {
|
||||
Self::new(input_formats, "%F")
|
||||
}
|
||||
}
|
||||
|
||||
impl Transformer for DateTransformer {
|
||||
fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
|
||||
for format in self.input_formats.iter() {
|
||||
if let Ok(time) = strptime(field_value, &format) {
|
||||
return TransformResult::present(
|
||||
&format!("{}", time.strftime(&self.output_format).unwrap())
|
||||
);
|
||||
}
|
||||
}
|
||||
TransformResult::error(field_value, field_name, record_n, "unable to parse as date")
|
||||
}
|
||||
}
|
||||
30
src/transformers/email.rs
Normal file
30
src/transformers/email.rs
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
use Transformer;
|
||||
use transformer::{
|
||||
TransformResultHelper,
|
||||
TransformResult
|
||||
};
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
lazy_static! {
|
||||
static ref EMAIL_REGEX: Regex = Regex::new(r"(?i)\A[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\z").unwrap();
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
|
||||
pub struct EmailTransformer {}
|
||||
|
||||
impl EmailTransformer {
|
||||
pub fn new() -> EmailTransformer {
|
||||
EmailTransformer {}
|
||||
}
|
||||
}
|
||||
|
||||
impl Transformer for EmailTransformer {
|
||||
fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
|
||||
if EMAIL_REGEX.is_match(field_value) {
|
||||
TransformResult::present(&field_value.to_lowercase())
|
||||
} else {
|
||||
TransformResult::error(field_value, field_name, record_n, "invalid email address")
|
||||
}
|
||||
}
|
||||
}
|
||||
76
src/transformers/mod.rs
Normal file
76
src/transformers/mod.rs
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
use transformer::{
|
||||
Transformer,
|
||||
TransformResult,
|
||||
};
|
||||
|
||||
mod trim;
|
||||
pub use self::trim::TrimTransformer;
|
||||
|
||||
mod none;
|
||||
pub use self::none::NoneTransformer;
|
||||
|
||||
mod regex;
|
||||
pub use self::regex::{
|
||||
RegexTransformer,
|
||||
RegexMatchTransformer
|
||||
};
|
||||
|
||||
mod capitalize;
|
||||
pub use self::capitalize::{
|
||||
CapitalizeTransformer,
|
||||
capitalize
|
||||
};
|
||||
|
||||
mod email;
|
||||
pub use self::email::EmailTransformer;
|
||||
|
||||
mod number;
|
||||
pub use self::number::NumberTransformer;
|
||||
|
||||
mod date;
|
||||
pub use self::date::DateTransformer;
|
||||
|
||||
mod choice;
|
||||
pub use self::choice::ChoiceTransformer;
|
||||
|
||||
mod zipcode;
|
||||
pub use self::zipcode::ZipcodeTransformer;
|
||||
|
||||
mod phone_number;
|
||||
pub use self::phone_number::PhoneNumberTransformer;
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
|
||||
pub enum Transformers
|
||||
{
|
||||
Trim(TrimTransformer),
|
||||
None(NoneTransformer),
|
||||
Regex(RegexTransformer),
|
||||
RegexMatch(RegexMatchTransformer),
|
||||
Capitalize(CapitalizeTransformer),
|
||||
Email(EmailTransformer),
|
||||
Number(NumberTransformer),
|
||||
Date(DateTransformer),
|
||||
Choice(ChoiceTransformer),
|
||||
Zipcode(ZipcodeTransformer),
|
||||
PhoneNumber(PhoneNumberTransformer),
|
||||
}
|
||||
|
||||
impl Transformer for Transformers {
|
||||
fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
|
||||
use self::Transformers::*;
|
||||
|
||||
match *self {
|
||||
Trim(ref t) => t.transform(field_value, field_name, record_n),
|
||||
None(ref t) => t.transform(field_value, field_name, record_n),
|
||||
Regex(ref t) => t.transform(field_value, field_name, record_n),
|
||||
RegexMatch(ref t) => t.transform(field_value, field_name, record_n),
|
||||
Capitalize(ref t) => t.transform(field_value, field_name, record_n),
|
||||
Email(ref t) => t.transform(field_value, field_name, record_n),
|
||||
Number(ref t) => t.transform(field_value, field_name, record_n),
|
||||
Date(ref t) => t.transform(field_value, field_name, record_n),
|
||||
Choice(ref t) => t.transform(field_value, field_name, record_n),
|
||||
Zipcode(ref t) => t.transform(field_value, field_name, record_n),
|
||||
PhoneNumber(ref t) => t.transform(field_value, field_name, record_n)
|
||||
}
|
||||
}
|
||||
}
|
||||
34
src/transformers/none.rs
Normal file
34
src/transformers/none.rs
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
use Transformer;
|
||||
use transformer::{
|
||||
TransformResultHelper,
|
||||
TransformResult
|
||||
};
|
||||
use newtypes::Regex;
|
||||
|
||||
use regex;
|
||||
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
|
||||
pub struct NoneTransformer {
|
||||
regex: Regex
|
||||
}
|
||||
|
||||
impl NoneTransformer {
|
||||
pub fn new(regex: regex::Regex) -> NoneTransformer {
|
||||
NoneTransformer { regex: Regex::from(regex) }
|
||||
}
|
||||
|
||||
pub fn with_blank_matcher() -> NoneTransformer {
|
||||
Self::new(regex::Regex::new(r"\A(?:[:cntrl:]|\s)*\z").unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
impl Transformer for NoneTransformer {
|
||||
fn transform(&self, field_value: &str, _: &str, _: usize) -> TransformResult {
|
||||
if self.regex.is_match(field_value) {
|
||||
TransformResult::excluded()
|
||||
} else {
|
||||
TransformResult::present(field_value)
|
||||
}
|
||||
}
|
||||
}
|
||||
30
src/transformers/number.rs
Normal file
30
src/transformers/number.rs
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
use Transformer;
|
||||
use transformer::{
|
||||
TransformResultHelper,
|
||||
TransformResult
|
||||
};
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
lazy_static! {
|
||||
static ref INTEGER_REGEX: Regex = Regex::new(r"\A(:?0|[1-9]\d*)\z").unwrap();
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
|
||||
pub struct NumberTransformer { }
|
||||
|
||||
impl NumberTransformer {
|
||||
pub fn match_integer() -> NumberTransformer {
|
||||
NumberTransformer { }
|
||||
}
|
||||
}
|
||||
|
||||
impl Transformer for NumberTransformer {
|
||||
fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
|
||||
if INTEGER_REGEX.is_match(field_value) {
|
||||
TransformResult::present(field_value)
|
||||
} else {
|
||||
TransformResult::error(field_value, field_name, record_n, "not a valid number")
|
||||
}
|
||||
}
|
||||
}
|
||||
34
src/transformers/phone_number.rs
Normal file
34
src/transformers/phone_number.rs
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
use Transformer;
|
||||
use transformer::{
|
||||
TransformResultHelper,
|
||||
TransformResult
|
||||
};
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
lazy_static! {
|
||||
static ref NANP_REGEX: Regex = Regex::new(r"\A(?:\+?1)?\D*\(?(?P<area>\d{3})\)?\D*(?P<exchange>\d{3})\D*(?P<subscriber>\d{4})\z").unwrap();
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
|
||||
pub struct PhoneNumberTransformer { }
|
||||
|
||||
impl PhoneNumberTransformer {
|
||||
pub fn expect_nanp_format() -> PhoneNumberTransformer {
|
||||
PhoneNumberTransformer { }
|
||||
}
|
||||
}
|
||||
|
||||
impl Transformer for PhoneNumberTransformer {
|
||||
fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
|
||||
if let Some(captures) = NANP_REGEX.captures(field_value) {
|
||||
let area_code = captures.name("area").unwrap().as_str();
|
||||
let exchange_code = captures.name("exchange").unwrap().as_str();
|
||||
let subscriber_number = captures.name("subscriber").unwrap().as_str();
|
||||
let phone_number = format!("+1 {} {} {}", area_code, exchange_code, subscriber_number);
|
||||
TransformResult::present(&phone_number)
|
||||
} else {
|
||||
TransformResult::error(field_value, field_name, record_n, "not a valid NANP format phone number")
|
||||
}
|
||||
}
|
||||
}
|
||||
88
src/transformers/regex.rs
Normal file
88
src/transformers/regex.rs
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
use Transformer;
|
||||
use transformer::{
|
||||
TransformResultHelper,
|
||||
TransformResult
|
||||
};
|
||||
use newtypes::Regex;
|
||||
|
||||
use regex;
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
|
||||
pub struct RegexTransformer
|
||||
{
|
||||
regex: Regex,
|
||||
template: String
|
||||
}
|
||||
|
||||
impl RegexTransformer
|
||||
{
|
||||
pub fn new(regex: regex::Regex, template: &str) -> RegexTransformer {
|
||||
RegexTransformer {
|
||||
regex: Regex::from(regex),
|
||||
template: template.to_string()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Transformer for RegexTransformer
|
||||
{
|
||||
fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
|
||||
if let Some(captures) = self.regex.captures(field_value) {
|
||||
let mut expansion = String::new();
|
||||
captures.expand(&self.template, &mut expansion);
|
||||
TransformResult::present(&expansion)
|
||||
} else {
|
||||
TransformResult::error(
|
||||
field_value,
|
||||
field_name,
|
||||
record_n,
|
||||
&format!("did not match pattern {}", self.regex)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
|
||||
pub struct RegexMatchTransformer
|
||||
{
|
||||
regex: Regex,
|
||||
negate: bool
|
||||
}
|
||||
|
||||
impl RegexMatchTransformer
|
||||
{
|
||||
pub fn matching(regex: regex::Regex) -> RegexMatchTransformer {
|
||||
RegexMatchTransformer {
|
||||
regex: Regex::from(regex),
|
||||
negate: false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn not_matching(regex: regex::Regex) -> RegexMatchTransformer {
|
||||
RegexMatchTransformer {
|
||||
regex: Regex::from(regex),
|
||||
negate: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Transformer for RegexMatchTransformer
|
||||
{
|
||||
fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
|
||||
let mut is_match = self.regex.is_match(field_value);
|
||||
if self.negate {
|
||||
is_match = !is_match;
|
||||
}
|
||||
|
||||
if is_match {
|
||||
TransformResult::present(field_value)
|
||||
} else {
|
||||
let reason = if self.negate {
|
||||
format!("matched exclusionary pattern {}", self.regex)
|
||||
} else {
|
||||
format!("did not match pattern {}", self.regex)
|
||||
};
|
||||
TransformResult::error(field_value, field_name, record_n, &reason)
|
||||
}
|
||||
}
|
||||
}
|
||||
20
src/transformers/trim.rs
Normal file
20
src/transformers/trim.rs
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
use Transformer;
|
||||
use transformer::{
|
||||
TransformResultHelper,
|
||||
TransformResult
|
||||
};
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
|
||||
pub struct TrimTransformer {}
|
||||
|
||||
impl TrimTransformer {
|
||||
pub fn new() -> TrimTransformer {
|
||||
TrimTransformer {}
|
||||
}
|
||||
}
|
||||
|
||||
impl Transformer for TrimTransformer {
|
||||
fn transform(&self, field_value: &str, _: &str, _: usize) -> TransformResult {
|
||||
TransformResult::present(field_value.trim())
|
||||
}
|
||||
}
|
||||
37
src/transformers/zipcode.rs
Normal file
37
src/transformers/zipcode.rs
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
use Transformer;
|
||||
use transformer::{
|
||||
TransformResultHelper,
|
||||
TransformResult
|
||||
};
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
lazy_static! {
|
||||
static ref ZIP_REGEX: Regex = Regex::new(r"\A(\d{5})\D*(?:(\d{4}))?\z").unwrap();
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
|
||||
pub struct ZipcodeTransformer { }
|
||||
|
||||
impl ZipcodeTransformer {
|
||||
pub fn new() -> ZipcodeTransformer {
|
||||
ZipcodeTransformer { }
|
||||
}
|
||||
}
|
||||
|
||||
impl Transformer for ZipcodeTransformer {
|
||||
fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
|
||||
if let Some(captures) = ZIP_REGEX.captures(field_value) {
|
||||
let base_code = captures.get(1).unwrap();
|
||||
let plus_four_code = captures.get(2);
|
||||
let zipcode = if let Some(pfc) = plus_four_code {
|
||||
format!("{}-{}", base_code.as_str(), pfc.as_str())
|
||||
} else {
|
||||
base_code.as_str().to_string()
|
||||
};
|
||||
TransformResult::present(&zipcode)
|
||||
} else {
|
||||
TransformResult::error(field_value, field_name, record_n, "not a valid zipcode")
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue