Initial commit

2019-02-06 12:24:39 -08:00 · 2019-02-06 12:24:39 -08:00 · 26c5433d16
commit 26c5433d16
21 changed files with 1968 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+target
+Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,22 @@
+[package]
+name = "csv-sanity"
+version = "0.1.0"
+authors = ["M. George Hansen <technopolitica@gmail.com>"]
+license = "MPL-2.0"
+maintenance = { status = "passively-maintained" }
+
+[dependencies]
+csv = "0.15.0"
+clap = "2.23.3"
+log = "0.3.7"
+regex = "0.2.1"
+lazy_static = "0.2.8"
+unicode-segmentation = "1.1.0"
+time = "0.1.37"
+maplit = "0.1.4"
+serde = "1.0"
+serde_derive = "1.0"
+serde_json = "1.0"
+custom_derive = "0.1.7"
+newtype_derive = "0.1.6"
+rustc-serialize = "0.3"
--- a/373
+++ b/373
@ -0,0 +1,373 @@
+Mozilla Public License Version 2.0
+==================================
+
+1. Definitions
+--------------
+
+1.1. "Contributor"
+    means each individual or legal entity that creates, contributes to
+    the creation of, or owns Covered Software.
+
+1.2. "Contributor Version"
+    means the combination of the Contributions of others (if any) used
+    by a Contributor and that particular Contributor's Contribution.
+
+1.3. "Contribution"
+    means Covered Software of a particular Contributor.
+
+1.4. "Covered Software"
+    means Source Code Form to which the initial Contributor has attached
+    the notice in Exhibit A, the Executable Form of such Source Code
+    Form, and Modifications of such Source Code Form, in each case
+    including portions thereof.
+
+1.5. "Incompatible With Secondary Licenses"
+    means
+
+    (a) that the initial Contributor has attached the notice described
+        in Exhibit B to the Covered Software; or
+
+    (b) that the Covered Software was made available under the terms of
+        version 1.1 or earlier of the License, but not also under the
+        terms of a Secondary License.
+
+1.6. "Executable Form"
+    means any form of the work other than Source Code Form.
+
+1.7. "Larger Work"
+    means a work that combines Covered Software with other material, in
+    a separate file or files, that is not Covered Software.
+
+1.8. "License"
+    means this document.
+
+1.9. "Licensable"
+    means having the right to grant, to the maximum extent possible,
+    whether at the time of the initial grant or subsequently, any and
+    all of the rights conveyed by this License.
+
+1.10. "Modifications"
+    means any of the following:
+
+    (a) any file in Source Code Form that results from an addition to,
+        deletion from, or modification of the contents of Covered
+        Software; or
+
+    (b) any new file in Source Code Form that contains any Covered
+        Software.
+
+1.11. "Patent Claims" of a Contributor
+    means any patent claim(s), including without limitation, method,
+    process, and apparatus claims, in any patent Licensable by such
+    Contributor that would be infringed, but for the grant of the
+    License, by the making, using, selling, offering for sale, having
+    made, import, or transfer of either its Contributions or its
+    Contributor Version.
+
+1.12. "Secondary License"
+    means either the GNU General Public License, Version 2.0, the GNU
+    Lesser General Public License, Version 2.1, the GNU Affero General
+    Public License, Version 3.0, or any later versions of those
+    licenses.
+
+1.13. "Source Code Form"
+    means the form of the work preferred for making modifications.
+
+1.14. "You" (or "Your")
+    means an individual or a legal entity exercising rights under this
+    License. For legal entities, "You" includes any entity that
+    controls, is controlled by, or is under common control with You. For
+    purposes of this definition, "control" means (a) the power, direct
+    or indirect, to cause the direction or management of such entity,
+    whether by contract or otherwise, or (b) ownership of more than
+    fifty percent (50%) of the outstanding shares or beneficial
+    ownership of such entity.
+
+2. License Grants and Conditions
+--------------------------------
+
+2.1. Grants
+
+Each Contributor hereby grants You a world-wide, royalty-free,
+non-exclusive license:
+
+(a) under intellectual property rights (other than patent or trademark)
+    Licensable by such Contributor to use, reproduce, make available,
+    modify, display, perform, distribute, and otherwise exploit its
+    Contributions, either on an unmodified basis, with Modifications, or
+    as part of a Larger Work; and
+
+(b) under Patent Claims of such Contributor to make, use, sell, offer
+    for sale, have made, import, and otherwise transfer either its
+    Contributions or its Contributor Version.
+
+2.2. Effective Date
+
+The licenses granted in Section 2.1 with respect to any Contribution
+become effective for each Contribution on the date the Contributor first
+distributes such Contribution.
+
+2.3. Limitations on Grant Scope
+
+The licenses granted in this Section 2 are the only rights granted under
+this License. No additional rights or licenses will be implied from the
+distribution or licensing of Covered Software under this License.
+Notwithstanding Section 2.1(b) above, no patent license is granted by a
+Contributor:
+
+(a) for any code that a Contributor has removed from Covered Software;
+    or
+
+(b) for infringements caused by: (i) Your and any other third party's
+    modifications of Covered Software, or (ii) the combination of its
+    Contributions with other software (except as part of its Contributor
+    Version); or
+
+(c) under Patent Claims infringed by Covered Software in the absence of
+    its Contributions.
+
+This License does not grant any rights in the trademarks, service marks,
+or logos of any Contributor (except as may be necessary to comply with
+the notice requirements in Section 3.4).
+
+2.4. Subsequent Licenses
+
+No Contributor makes additional grants as a result of Your choice to
+distribute the Covered Software under a subsequent version of this
+License (see Section 10.2) or under the terms of a Secondary License (if
+permitted under the terms of Section 3.3).
+
+2.5. Representation
+
+Each Contributor represents that the Contributor believes its
+Contributions are its original creation(s) or it has sufficient rights
+to grant the rights to its Contributions conveyed by this License.
+
+2.6. Fair Use
+
+This License is not intended to limit any rights You have under
+applicable copyright doctrines of fair use, fair dealing, or other
+equivalents.
+
+2.7. Conditions
+
+Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
+in Section 2.1.
+
+3. Responsibilities
+-------------------
+
+3.1. Distribution of Source Form
+
+All distribution of Covered Software in Source Code Form, including any
+Modifications that You create or to which You contribute, must be under
+the terms of this License. You must inform recipients that the Source
+Code Form of the Covered Software is governed by the terms of this
+License, and how they can obtain a copy of this License. You may not
+attempt to alter or restrict the recipients' rights in the Source Code
+Form.
+
+3.2. Distribution of Executable Form
+
+If You distribute Covered Software in Executable Form then:
+
+(a) such Covered Software must also be made available in Source Code
+    Form, as described in Section 3.1, and You must inform recipients of
+    the Executable Form how they can obtain a copy of such Source Code
+    Form by reasonable means in a timely manner, at a charge no more
+    than the cost of distribution to the recipient; and
+
+(b) You may distribute such Executable Form under the terms of this
+    License, or sublicense it under different terms, provided that the
+    license for the Executable Form does not attempt to limit or alter
+    the recipients' rights in the Source Code Form under this License.
+
+3.3. Distribution of a Larger Work
+
+You may create and distribute a Larger Work under terms of Your choice,
+provided that You also comply with the requirements of this License for
+the Covered Software. If the Larger Work is a combination of Covered
+Software with a work governed by one or more Secondary Licenses, and the
+Covered Software is not Incompatible With Secondary Licenses, this
+License permits You to additionally distribute such Covered Software
+under the terms of such Secondary License(s), so that the recipient of
+the Larger Work may, at their option, further distribute the Covered
+Software under the terms of either this License or such Secondary
+License(s).
+
+3.4. Notices
+
+You may not remove or alter the substance of any license notices
+(including copyright notices, patent notices, disclaimers of warranty,
+or limitations of liability) contained within the Source Code Form of
+the Covered Software, except that You may alter any license notices to
+the extent required to remedy known factual inaccuracies.
+
+3.5. Application of Additional Terms
+
+You may choose to offer, and to charge a fee for, warranty, support,
+indemnity or liability obligations to one or more recipients of Covered
+Software. However, You may do so only on Your own behalf, and not on
+behalf of any Contributor. You must make it absolutely clear that any
+such warranty, support, indemnity, or liability obligation is offered by
+You alone, and You hereby agree to indemnify every Contributor for any
+liability incurred by such Contributor as a result of warranty, support,
+indemnity or liability terms You offer. You may include additional
+disclaimers of warranty and limitations of liability specific to any
+jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+---------------------------------------------------
+
+If it is impossible for You to comply with any of the terms of this
+License with respect to some or all of the Covered Software due to
+statute, judicial order, or regulation then You must: (a) comply with
+the terms of this License to the maximum extent possible; and (b)
+describe the limitations and the code they affect. Such description must
+be placed in a text file included with all distributions of the Covered
+Software under this License. Except to the extent prohibited by statute
+or regulation, such description must be sufficiently detailed for a
+recipient of ordinary skill to be able to understand it.
+
+5. Termination
+--------------
+
+5.1. The rights granted under this License will terminate automatically
+if You fail to comply with any of its terms. However, if You become
+compliant, then the rights granted under this License from a particular
+Contributor are reinstated (a) provisionally, unless and until such
+Contributor explicitly and finally terminates Your grants, and (b) on an
+ongoing basis, if such Contributor fails to notify You of the
+non-compliance by some reasonable means prior to 60 days after You have
+come back into compliance. Moreover, Your grants from a particular
+Contributor are reinstated on an ongoing basis if such Contributor
+notifies You of the non-compliance by some reasonable means, this is the
+first time You have received notice of non-compliance with this License
+from such Contributor, and You become compliant prior to 30 days after
+Your receipt of the notice.
+
+5.2. If You initiate litigation against any entity by asserting a patent
+infringement claim (excluding declaratory judgment actions,
+counter-claims, and cross-claims) alleging that a Contributor Version
+directly or indirectly infringes any patent, then the rights granted to
+You by any and all Contributors for the Covered Software under Section
+2.1 of this License shall terminate.
+
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all
+end user license agreements (excluding distributors and resellers) which
+have been validly granted by You or Your distributors under this License
+prior to termination shall survive termination.
+
+************************************************************************
+*                                                                      *
+*  6. Disclaimer of Warranty                                           *
+*  -------------------------                                           *
+*                                                                      *
+*  Covered Software is provided under this License on an "as is"       *
+*  basis, without warranty of any kind, either expressed, implied, or  *
+*  statutory, including, without limitation, warranties that the       *
+*  Covered Software is free of defects, merchantable, fit for a        *
+*  particular purpose or non-infringing. The entire risk as to the     *
+*  quality and performance of the Covered Software is with You.        *
+*  Should any Covered Software prove defective in any respect, You     *
+*  (not any Contributor) assume the cost of any necessary servicing,   *
+*  repair, or correction. This disclaimer of warranty constitutes an   *
+*  essential part of this License. No use of any Covered Software is   *
+*  authorized under this License except under this disclaimer.         *
+*                                                                      *
+************************************************************************
+
+************************************************************************
+*                                                                      *
+*  7. Limitation of Liability                                          *
+*  --------------------------                                          *
+*                                                                      *
+*  Under no circumstances and under no legal theory, whether tort      *
+*  (including negligence), contract, or otherwise, shall any           *
+*  Contributor, or anyone who distributes Covered Software as          *
+*  permitted above, be liable to You for any direct, indirect,         *
+*  special, incidental, or consequential damages of any character      *
+*  including, without limitation, damages for lost profits, loss of    *
+*  goodwill, work stoppage, computer failure or malfunction, or any    *
+*  and all other commercial damages or losses, even if such party      *
+*  shall have been informed of the possibility of such damages. This   *
+*  limitation of liability shall not apply to liability for death or   *
+*  personal injury resulting from such party's negligence to the       *
+*  extent applicable law prohibits such limitation. Some               *
+*  jurisdictions do not allow the exclusion or limitation of           *
+*  incidental or consequential damages, so this exclusion and          *
+*  limitation may not apply to You.                                    *
+*                                                                      *
+************************************************************************
+
+8. Litigation
+-------------
+
+Any litigation relating to this License may be brought only in the
+courts of a jurisdiction where the defendant maintains its principal
+place of business and such litigation shall be governed by laws of that
+jurisdiction, without reference to its conflict-of-law provisions.
+Nothing in this Section shall prevent a party's ability to bring
+cross-claims or counter-claims.
+
+9. Miscellaneous
+----------------
+
+This License represents the complete agreement concerning the subject
+matter hereof. If any provision of this License is held to be
+unenforceable, such provision shall be reformed only to the extent
+necessary to make it enforceable. Any law or regulation which provides
+that the language of a contract shall be construed against the drafter
+shall not be used to construe this License against a Contributor.
+
+10. Versions of the License
+---------------------------
+
+10.1. New Versions
+
+Mozilla Foundation is the license steward. Except as provided in Section
+10.3, no one other than the license steward has the right to modify or
+publish new versions of this License. Each version will be given a
+distinguishing version number.
+
+10.2. Effect of New Versions
+
+You may distribute the Covered Software under the terms of the version
+of the License under which You originally received the Covered Software,
+or under the terms of any subsequent version published by the license
+steward.
+
+10.3. Modified Versions
+
+If you create software not governed by this License, and you want to
+create a new license for such software, you may create and use a
+modified version of this License if you rename the license and remove
+any references to the name of the license steward (except to note that
+such modified license differs from this License).
+
+10.4. Distributing Source Code Form that is Incompatible With Secondary
+Licenses
+
+If You choose to distribute Source Code Form that is Incompatible With
+Secondary Licenses under the terms of this version of the License, the
+notice described in Exhibit B of this License must be attached.
+
+Exhibit A - Source Code Form License Notice
+-------------------------------------------
+
+  This Source Code Form is subject to the terms of the Mozilla Public
+  License, v. 2.0. If a copy of the MPL was not distributed with this
+  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular
+file, then You may include the notice in a location (such as a LICENSE
+file in a relevant directory) where a recipient would be likely to look
+for such a notice.
+
+You may add additional accurate notices of copyright ownership.
+
+Exhibit B - "Incompatible With Secondary Licenses" Notice
+---------------------------------------------------------
+
+  This Source Code Form is "Incompatible With Secondary Licenses", as
+  defined by the Mozilla Public License, v. 2.0.
--- a/README.md
+++ b/README.md
@ -0,0 +1,268 @@
+# csv-sanity
+
+Preserve your sanity is a world full of malformed, poorly validated CSV files.
+Sanitize and transform large CSVs with millions of records quickly and
+efficiently.
+
+**NOTE:** csv-sanity is in an alpha state and is subject to breaking changes.
+The ruleset file syntax in particular is likely to change in the near future.
+I've personally used csv-sanity on a number of projects and it has been
+incredibly helpful, but as with most alpha software csv-sanity is provided
+as-is and provides no warranty or guarantee. Use at your own risk and double
+check your transformed files!
+
+## Purpose
+
+The CSV format is not well-standardized and has many shortfalls when it comes to
+storing large numbers of records with complex data formats, but CSVs are
+ubiquitous in many realms as a neutral interchange format that most CRMs and
+database software can parse and understand.
+
+But what happens when your CRM can only parse ISO 8601 formatted dates and the
+CSV you inherited has dates in another format such as the following:
+
+```csv
+id,name,signup_date
+2,John Doe,11/22/2017
+3,Jane Doe,11/28/2017
+```
+
+Or you received a CSV of people who you need to contact via a personalized
+email, but your contacts' names in the CSV are in ALL CAPS:
+
+```csv
+id,first_name,last_name
+2,JOHN,DOE
+3,JANE,DOE
+```
+
+Or you have a CSV that has valid values for the vast majority of records, but 1
+out of every 20k records has nonsense values that cause your entire import to
+abort:
+
+```csv
+id,fist_name,last_name,party_registration
+2,Jane,Doe,REP
+3,John,Doe,DEM
+345,Josh,Smith,HAHAHAHA
+```
+
+Or even a CSV that has a few malformed records due to unescaped commas:
+
+```csv
+id,first_name,last_name,email
+2,Jane,Doe,jane@example.com
+3,John,Doe,"i,don't,follow,the,rules"@example.com
+```
+
+These are all real problems I've encountered with CSVs over the years. If the
+CSV is small enough they can be corrected by hand, but for CSVs with 10k, 100k
+or even millions of records correcting by hand simply isn't a viable option.
+
+`csv-sanity` aims to solve the issue of sanitizing large, poorly-validated CSVs.
+
+## Usage
+
+`csv-sanity` is an executable that takes an input CSV to process and a JSON
+ruleset file defining the transformation rules to apply:
+
+```bash
+csv-sanity [-r RULESET_FILE] <INPUT_FILE>
+```
+
+If a path to a ruleset file is not provided via the `-r` option, `csv-sanity`
+will look for a file named "ruleset.json" in the current directory.
+
+By default, `csv-sanity` outputs two files to the current directory:
+output.csv, which contains the processed CSV with validated and transformed
+records, and errors.csv, which contains a list of records and fields that
+couldn't be processed and reasons they were rejected. The paths where the output
+and error files are output can be overridden via the `-o FILE_PATH` and
+`-e FILE_PATH` options, respectively.
+
+## ruleset.json Syntax
+
+Ruleset files are JSON files that define a collection of transformation rules
+and the fields to which they should be applied.
+
+The following is an example ruleset JSON file:
+
+```json
+{
+    "rules": [
+        {
+            "applicability": {
+                "Global": [],
+            },
+            "transformer": {
+                "None": {
+                    "regex": "\\A(?:[:cntrl:]|\\s)*\\z"
+                }
+            },
+            "priority": -10
+        },
+        {
+            "applicability": {
+                "Global": [],
+            },
+            "transformer": {
+                "Trim": {}
+            },
+            "priority": -10
+        },
+        {
+            "applicability": {
+                "Fields": {
+                    "field_names": [
+                        "first_name",
+                        "last_name"
+                    ]
+                }
+            },
+            "transformer": {
+                "Capitalize": {}
+            }
+        }
+    ]
+}
+```
+
+Every ruleset.json file is a JSON object with a single "rules" field with an
+array of rule objects.
+
+Rules are objects with two fields:
+
+- **"applicability"**: specifies whether a rule applies globally or only to a
+    predefined set of fields (specified as the column headers in the CSV being
+    processed)
+- **"transformer"**: a transformer object, which specifies how the applicable
+    fields should be transformed.
+
+### Transformers
+
+
+
+#### Capitalize
+
+Transforms string fields into Capital Case.
+
+#### Choice
+
+Only accepts a pre-defined list of acceptable values and rejects the rest.
+
+#### Date
+
+```json
+{
+    "Date": {
+        "input_formats": [
+            "%m/%d/%Y"
+        ],
+        "output_formats": "%F"
+    }
+}
+```
+
+Attempt to parse fields with a list of datetime formats via
+[time::strptime](https://docs.rs/time/0.1.37/time/fn.strptime.html). See the
+docs for the [time](https://docs.rs/time/0.1.37/time/index.html) crate for
+details on datetime formating syntax.
+
+#### Email
+
+```json
+{
+    "Email": {}
+}
+```
+
+Attempt to parse fields as email addresses, rejecting any fields that appear to
+be invalid email addresses.
+
+#### None
+
+```json
+{
+    "None": {
+        "regex": "\\A(?:[:cntrl:]|\\s)*\\z"
+    }
+}
+```
+
+Replace matched fields with a blank value. Useful as a global rule for
+normalizing blank fields in a CSV file.
+
+#### Number
+
+```json
+{
+    "Number": {}
+}
+```
+
+Attempt to parse fields as whole integers, rejecting any fields that cannot be
+parsed.
+
+#### PhoneNumber
+
+```json
+{
+    "PhoneNumber": {}
+}
+```
+
+Attempt to parse files as US, NANP-formatted phone numbers, transforming them
+into a standard international format of `+1 <area_code> <exchange_code> <subscriber_number>`.
+
+#### Regex
+
+```json
+{
+    "Regex": {
+        "regex": "\\A([A-Z])[A-Z]+\\z",
+        "template": "$1"
+    }
+}
+```
+
+Match fields against the provided regex pattern and transform them according to
+the template string, replacing capture groups placeholders. See the
+[Regex::replace](https://docs.rs/regex/0.2.1/regex/struct.Regex.html#method.replace)
+in the regex crate docs for details.
+
+#### RegexMatch
+
+```json
+{
+    "RegexMatch": {
+        "regex": "\\A[A-Z]{2,3}\\z",
+        "negate": false
+    }
+}
+```
+
+Reject any fields that fail to match against the provided regex pattern. If
+`negate` is `true`, the reject any fields that match the provided regex pattern
+instead.
+
+#### Trim
+
+```json
+{
+    "Trim": {}
+}
+```
+
+Trim leading and trailing whitespace from fields. Useful as a global rule to
+normalize fields and remove useless whitespace.
+
+#### Zipcode
+
+```json
+{
+    "Zipcode": {}
+}
+```
+
+Attempt to parse fields as US zip codes in the formats "xxxxx" and "xxxxx-xxxx",
+rejecting any fields that fail to match that format.
--- a/src/cli.rs
+++ b/src/cli.rs
@ -0,0 +1,193 @@
+//! Command line interface.
+
+use std::fs::File;
+use std::path::Path;
+
+use {
+    Ruleset,
+    TransformError,
+    TransformedRecord,
+};
+
+use csv;
+
+/// Configuration options for the `Cli`.
+pub struct Options
+{
+    /// See `CsvOptions`.
+    pub csv_options: CsvOptions,
+}
+
+impl Default for Options {
+    fn default() -> Options {
+        Options {
+            csv_options: Default::default(),
+        }
+    }
+}
+
+/// `Cli` configuration options specific to how to parse the CSV file.
+///
+/// `CsvOptions` implements `Default` with the following defaults:
+///
+/// ```
+/// extern crate csv;
+/// use csv_sanity::cli::CsvOptions;
+/// use csv::RecordTerminator;
+///
+/// let defaults = CsvOptions {
+///     delimiter: b',',
+///     record_terminator: csv::RecordTerminator::CRLF,
+///     quote: b'"',
+///     escape: None,
+///     double_quote: true,
+/// };
+/// assert_eq!(defaults, Default::default());
+/// ```
+pub struct CsvOptions
+{
+    /// Field delimeter to expect in the CSV file.
+    ///
+    /// Corresponds to the `csv::Reader.delimiter` method.
+    pub delimiter: u8,
+    /// Record terminator to expect in the CSV file.
+    ///
+    /// Corresponds to the `csv::Reader.record_terminator` method. See `csv::RecordTerminator`.
+    pub record_terminator: csv::RecordTerminator,
+    /// Field quotation character to expect in the CSV file.
+    ///
+    /// Corresponds to the `csv::Reader.quote` method.
+    pub quote: u8,
+    /// Escape character to expect in the CSV file.
+    ///
+    /// Corresponds to the `csv::Reader.escape` method.
+    pub escape: Option<u8>,
+    /// Whether two adjacent quote characters should be interpreted as an escaped quote character.
+    ///
+    /// Corresponds to the `csv::Reader.double_quote` method.
+    pub double_quote: bool
+}
+
+impl Default for CsvOptions
+{
+    fn default() -> CsvOptions {
+        CsvOptions {
+            delimiter: b',',
+            record_terminator: csv::RecordTerminator::CRLF,
+            quote: b'"',
+            escape: None,
+            double_quote: true,
+        }
+    }
+}
+
+/// Command line interface for running a `Ruleset` against a CSV file.
+pub struct Cli
+{
+    options: Options,
+    ruleset: Ruleset,
+}
+
+impl Cli
+{
+    /// Construct a new `Cli` with default options.
+    ///
+    /// ```
+    /// use csv_sanity::Ruleset;
+    /// use csv_sanity::cli::{
+    ///     Cli
+    /// };
+    ///
+    /// let ruleset = Ruleset::new();
+    /// let cli = Cli::new(ruleset);
+    /// ```
+    pub fn new(ruleset: Ruleset) -> Cli {
+        Self::new_with_options(ruleset, Default::default())
+    }
+
+    /// Construct a new `Cli` with the specified options.
+    ///
+    /// ```
+    /// use csv_sanity::Ruleset;
+    /// use csv_sanity::cli::{
+    ///     Cli,
+    ///     Options,
+    ///     CsvOptions
+    /// };
+    ///
+    /// let ruleset = Ruleset::new();
+    /// let cli = Cli::new_with_options(ruleset, Options {
+    ///     csv_options: CsvOptions {
+    ///         delimiter: b',',
+    ///         .. Default::default()
+    ///     },
+    ///     .. Default::default()
+    /// });
+    /// ```
+    pub fn new_with_options(ruleset: Ruleset, options: Options) -> Cli {
+        Cli {
+            options: options,
+            ruleset: ruleset,
+        }
+    }
+
+    pub fn run<I: AsRef<Path>, O: AsRef<Path>, E: AsRef<Path>>(&self, input_file_path: I, output_file_name: O, error_file_name: E) {
+        let (mut reader, headers) = self.reader_from_file(input_file_path);
+
+        let mut output_writer = csv::Writer::from_file(output_file_name).expect("Unable to open output file for writing");
+        let mut output_headers = headers.clone();
+        output_headers.insert(0, "Record Number".to_string());
+        output_writer.encode(output_headers).expect("Unable to write to output file");
+
+        let mut error_writer = csv::Writer::from_file(error_file_name).expect("Unable to open error file for writing");
+        let error_headers = vec![
+            "Record Number",
+            "Field Name",
+            "Field Value",
+            "Reason",
+        ];
+        error_writer.encode(error_headers).expect("Unable to write to error file");
+
+        for (record_n, record) in reader.records().enumerate() {
+            let original_line_n = record_n + 2; // Plus one for headers and plus one for zero-indexing.
+            let transformed_record: TransformedRecord = match record {
+                Err(e) => {
+                    let err = TransformError {
+                        field_value: "".to_string(),
+                        field_name: "".to_string(),
+                        record_n: original_line_n,
+                        reason: format!("{}", e),
+                    };
+                    error_writer.encode(err).expect("Unable to write to error file");
+                    continue;
+                },
+                Ok(ref rec) => self.ruleset.apply_rules(&headers, rec, original_line_n)
+            };
+            let record_fields: Vec<Option<String>> = {
+                let mut fs = vec![Some(original_line_n.to_string())];
+                fs.extend(transformed_record.field_values);
+                fs
+            };
+            output_writer.encode(record_fields).expect("Unable to write to output file");
+            for error in transformed_record.errors {
+                error_writer.encode(error).expect("Unable to write to error file");
+            }
+        }
+    }
+
+    fn reader_from_file<P: AsRef<Path>>(&self, path: P) -> (csv::Reader<File>, Vec<String>) {
+        let mut reader = csv::Reader::from_file(path.as_ref().clone()).map(|r| {
+            // Configure the reader according to the options passed to the Cli constructor.
+            r.has_headers(true)
+                .delimiter(self.options.csv_options.delimiter)
+                .record_terminator(self.options.csv_options.record_terminator)
+                .quote(self.options.csv_options.quote)
+                .escape(self.options.csv_options.escape)
+                .double_quote(self.options.csv_options.double_quote)
+                .flexible(true)
+        }).expect(&format!("Unable to read file {}", path.as_ref().display()));
+        let headers = reader.headers()
+            .expect(&format!("Unable to read headers from input file {}", path.as_ref().display()));
+        (reader, headers)
+    }
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -0,0 +1,36 @@
+#[macro_use]
+extern crate serde_derive;
+extern crate serde;
+extern crate serde_json;
+extern crate regex;
+#[macro_use]
+extern crate lazy_static;
+extern crate unicode_segmentation; 
+extern crate time;
+extern crate csv;
+#[macro_use]
+extern crate custom_derive;
+#[macro_use]
+extern crate newtype_derive;
+extern crate rustc_serialize;
+
+mod newtypes;
+
+pub mod transformer;
+pub use transformer::{
+    Transformer,
+    TransformResult,
+    TransformResultHelper,
+    TransformError
+};
+
+pub mod transformers;
+
+mod ruleset;
+pub use ruleset::{
+    Rule,
+    Ruleset,
+    TransformedRecord,
+};
+
+pub mod cli;
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,111 @@
+extern crate csv_sanity;
+
+extern crate serde_json;
+#[macro_use]
+extern crate log;
+extern crate regex;
+#[macro_use]
+extern crate clap;
+
+use csv_sanity::cli::{
+    self,
+    Cli,
+};
+
+use std::fs::File;
+use std::path::Path;
+use log::{
+    LogRecord,
+    LogLevel,
+    LogMetadata,
+    LogLevelFilter,
+    SetLoggerError
+};
+use clap::{
+    App,
+    Arg
+};
+
+struct ConsoleLogger {
+    log_level: LogLevel
+}
+
+impl log::Log for ConsoleLogger {
+    fn enabled(&self, metadata: &LogMetadata) -> bool {
+        metadata.level() <= self.log_level
+    }
+
+    fn log(&self, record: &LogRecord) {
+        if self.enabled(record.metadata()) {
+            println!("{} - {}", record.level(), record.args())
+        }
+    }
+}
+
+fn init_logging() -> Result<(), SetLoggerError> {
+    log::set_logger(|max_log_level| {
+       max_log_level.set(LogLevelFilter::Info);
+       Box::new(ConsoleLogger { log_level: LogLevel::Info })
+   })
+}
+
+fn main() {
+    init_logging().unwrap();
+
+    let matches = App::new("Convert CSV")
+        .version(crate_version!())
+        .author("M. George Hansen <technopolitica@gmail.com>")
+        .about("Apply a set of transformations to the records in a CSV file, attempting to read a much valid information from the file as possible.")
+        .arg(Arg::with_name("INPUT_FILE")
+            .help("CSV file to process")
+            .required(true)
+            .index(1))
+        .arg(Arg::with_name("output")
+            .help("File to output the transformed CSV records. Defaults to ./output.csv")
+            .short("o")
+            .long("output")
+            .takes_value(true))
+        .arg(Arg::with_name("error_output")
+            .help("File to output errors in CSV format. Defaults to ./errors.csv")
+            .short("e")
+            .long("error_output")
+            .takes_value(true))
+        .arg(Arg::with_name("ruleset")
+            .help("JSON file containing the ruleset to apply. Defaults to ./ruleset.json")
+            .short("r")
+            .long("ruleset")
+            .takes_value(true))
+        .get_matches();
+
+    let ruleset_file_path = Path::new(matches.value_of("ruleset").unwrap_or("ruleset.json"));
+    let ruleset_file = match File::open(ruleset_file_path) {
+        Ok(f) => f,
+        Err(e) => exit_with_error(&format!("unable to read ruleset file {}: {}", ruleset_file_path.display(), e))
+    };
+    let ruleset = match serde_json::from_reader(ruleset_file) {
+        Ok(r) => r,
+        Err(e) => {
+            exit_with_error(&format!("failed to parse ruleset from {}: {}", ruleset_file_path.display(), e));
+        }
+    };
+
+    let cli_app = Cli::new_with_options(ruleset, cli::Options {
+        csv_options: cli::CsvOptions {
+            delimiter: b'\t',
+            .. Default::default()
+        },
+        .. Default::default()
+    });
+
+    // NOTE: Required arguments are validated by clap, so we should be safe to use expect here.
+    let input_file_name = matches.value_of("INPUT_FILE").expect("INPUT_FILE argument could not be found!");
+    let output_file_name = matches.value_of("output_file").unwrap_or("output.csv");
+    let error_file_name = matches.value_of("error_file").unwrap_or("errors.csv");
+    cli_app.run(input_file_name, output_file_name, error_file_name);
+}
+
+fn exit_with_error(error_msg: &str) -> !
+{
+    error!("{}", error_msg);
+    std::process::exit(1);
+}
--- a/src/newtypes.rs
+++ b/src/newtypes.rs
@ -0,0 +1,59 @@
+use std::hash::{
+    Hash,
+    Hasher,
+};
+use regex;
+use serde::{
+    Serialize,
+    Serializer,
+    Deserialize,
+    Deserializer,
+};
+
+custom_derive! {
+    #[derive(NewtypeFrom, NewtypeDeref, NewtypeDerefMut, Clone, NewtypeDisplay, NewtypeDebug)]
+    pub struct Regex(regex::Regex);
+}
+
+impl PartialEq for Regex {
+    fn eq(&self, other: &Regex) -> bool
+    {
+        self.0.as_str() == other.0.as_str()
+    }
+}
+
+impl Eq for Regex {}
+
+impl Hash for Regex {
+    fn hash<H>(&self, state: &mut H)
+        where H: Hasher {
+        self.as_str().hash(state);
+    }
+}
+
+impl Serialize for Regex
+{
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+      where S: Serializer {
+        let Regex(ref regex) = *self;
+        regex.as_str().serialize(serializer)
+    }
+}
+
+impl<'de> Deserialize<'de> for Regex
+{
+    fn deserialize<D>(deserializer: D) -> Result<Regex, D::Error>
+      where D: Deserializer<'de>
+    {
+        use serde::de::{Unexpected, Error};
+        let string: Result<String, D::Error> = Deserialize::deserialize(deserializer);
+        string.and_then(|s| {
+            regex::Regex::new(&s)
+                .map(|r| Regex(r))
+                .map_err(|e| {
+                    let message: &str = &format!("invalid regex string: {}", e);
+                    D::Error::invalid_value(Unexpected::Str(&s), &message)
+            })
+        })
+    }
+}
--- a/src/ruleset.rs
+++ b/src/ruleset.rs
@ -0,0 +1,345 @@
+use Transformer;
+use transformer::{
+    TransformResult,
+    TransformError,
+};
+use transformers::{
+    Transformers,
+    TrimTransformer,
+    NoneTransformer,
+};
+
+use std::hash::{
+    Hash,
+    Hasher,
+};
+use std::iter::FromIterator;
+use std::cmp::Ordering;
+use std::collections::{
+    BinaryHeap,
+    HashSet,
+};
+use std::error;
+use std::fmt::{
+    self,
+    Formatter,
+    Display,
+};
+
+/// Applicability of a `Rule` determining which CSV record's fields it can be applied to.
+#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
+pub enum Applicability {
+    /// Applicable to all CSV record fields.
+    Global,
+    /// Applicable to a subset of a CSV record's fields, specified by field name.
+    Fields {
+        field_names: HashSet<String>
+    }
+}
+
+impl Hash for Applicability {
+    fn hash<H>(&self, state: &mut H)
+        where H: Hasher {
+        use self::Applicability::*;
+        match *self {
+            Global => (self as *const Applicability).hash(state), // FIXME: Is this the correct way to hash an empty enum variant?
+            Fields { ref field_names } => field_names.iter().collect::<Vec<&String>>().hash(state)
+        }
+    }
+}
+
+fn priority_is_default(priority: &isize) -> bool {
+    priority == &0
+}
+
+/// A `Transformer` paired with `Applicability` and a priority which can be applied to fields in a
+/// CSV record.
+#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
+pub struct Rule
+{
+    applicability: Applicability,
+    transformer: Transformers,
+    #[serde(default, skip_serializing_if="priority_is_default")]
+    priority: isize
+}
+
+impl Rule
+{
+    /// Construct a new `Rule` whoe `Transformer` is applicable to one or more CSV record's fields
+    /// referenced by name with the default priority of 0.
+    ///
+    /// # Examples
+    /// ```
+    /// use csv_sanity::Rule;
+    /// use csv_sanity::transformers::*;
+    ///
+    /// let rule = Rule::for_fields(&["First Name", "Last Name"], Transformers::Capitalize(
+    ///     CapitalizeTransformer::new()
+    /// ));
+    /// ```
+    pub fn for_fields(field_names: &[&str], transformer: Transformers) -> Rule {
+        Self::for_fields_with_priority(field_names, transformer, Default::default())
+    }
+
+    /// Construct a new `Rule` whoe `Transformer` is applicable to one or more CSV record's fields
+    /// referenced by name with the specified priority.
+    ///
+    /// # Examples
+    /// ```
+    /// use csv_sanity::Rule;
+    /// use csv_sanity::transformers::*;
+    ///
+    /// let rule = Rule::for_fields_with_priority(&["Fist Name", "Last Name"], Transformers::Capitalize(
+    ///     CapitalizeTransformer::new()
+    /// ), 10);
+    /// ```
+    pub fn for_fields_with_priority(field_names: &[&str], transformer: Transformers, priority: isize) -> Rule {
+        Rule {
+            applicability: Applicability::Fields { field_names: field_names.iter().map(|s| s.to_string()).collect() },
+            transformer: transformer,
+            priority: priority
+        }
+    }
+
+    /// Construct a new `Rule` applicable to all of a CSV record's fields with the default priority
+    /// of 0.
+    ///
+    /// # Examples
+    /// ```
+    /// use csv_sanity::Rule;
+    /// use csv_sanity::transformers::*;
+    ///
+    /// let rule = Rule::global(Transformers::Capitalize(
+    ///     CapitalizeTransformer::new()
+    /// ));
+    /// ```
+    pub fn global(transformer: Transformers) -> Rule {
+        Self::global_with_priority(transformer, Default::default())
+    }
+
+    /// Construct a new `Rule` applicable to all of a CSV record's fields with the specified
+    /// priority.
+    ///
+    /// # Examples
+    /// ```
+    /// use csv_sanity::Rule;
+    /// use csv_sanity::transformers::*;
+    ///
+    /// let rule = Rule::global_with_priority(Transformers::Capitalize(
+    ///     CapitalizeTransformer::new()
+    /// ), 10);
+    /// ```
+    pub fn global_with_priority(transformer: Transformers, priority: isize) -> Rule {
+        Rule {
+            applicability: Applicability::Global,
+            transformer: transformer,
+            priority: priority
+        }
+    }
+
+    /// Apply this rule to a CSV record's field, returning the resulting `TransformResult`.
+    ///
+    /// # Examples
+    /// ```
+    /// use csv_sanity::Rule;
+    /// use csv_sanity::transformers::*;
+    ///
+    /// let field = "JOHN";
+    /// let field_name = "First Name";
+    ///
+    /// let rule = Rule::for_fields(&["First Name", "Last Name"], Transformers::Capitalize(
+    ///     CapitalizeTransformer::new()
+    /// ));
+    /// rule.apply(field, field_name, 1);
+    /// ```
+    pub fn apply(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
+        // XXX: Does the applicability check belong inside the apply method? Or should the caller
+        //   decide?
+        match self.applicability {
+            Applicability::Global => self.transformer.transform(field_value, field_name, record_n),
+            Applicability::Fields { ref field_names } if field_names.contains(&field_name.to_string()) => {
+                self.transformer.transform(field_value, field_name, record_n)
+            },
+            _ => Ok(Some(field_value.to_string()))
+        }
+    }
+}
+
+impl Ord for Rule
+{
+    fn cmp(&self, other: &Self) -> Ordering {
+        other.priority.cmp(&self.priority)
+    }
+}
+
+impl PartialOrd for Rule
+{
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+/// An ordered set of `Rule`s sorted by priority.
+///
+/// # Examples
+/// ```
+/// use csv_sanity::{
+///     Ruleset,
+///     Rule,
+///     TransformedRecord,
+/// };
+/// use csv_sanity::transformers::*;
+/// let ruleset = {
+///     let mut r = Ruleset::new();
+///     r.add_rule(Rule::for_fields(&["First Name", "Last Name"], Transformers::Capitalize(
+///         CapitalizeTransformer::new()
+///     )));
+///     r.add_rule(Rule::for_fields(&["Email"], Transformers::Email(
+///         EmailTransformer::new()
+///     )));
+///     r
+/// };
+/// let headers = vec!["Id", "First Name", "Last Name", "Email"].iter().map(|s| s.to_string()).collect();
+/// let record = vec!["1", " JOHN", "SNOW  ", "\t   JSNOW@EXAMPLE.COM "].iter().map(|s| s.to_string()).collect();
+/// let transformed_record = ruleset.apply_rules(&headers, &record, 1);
+/// assert_eq!(TransformedRecord {
+///     field_values: vec!["1", "John", "Snow", "jsnow@example.com"].iter().map(|s| Some(s.to_string())).collect(),
+///     errors: Vec::new(),
+/// }, transformed_record);
+/// ```
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct Ruleset {
+    rules: BinaryHeap<Rule>
+}
+
+impl Ruleset {
+    /// Construct a new `Ruleset` with a default `NoneTransformer` and `TrimTransformer` global
+    /// rules.
+    ///
+    /// The default trim and none rules should be appropriate for most CSV files. For CSV files
+    /// where these default rules are not desired use the `Ruleset::without_default_rules` method.
+    pub fn new() -> Ruleset {
+        let mut ruleset = Self::without_default_rules();
+        // Add a default trim rule and blank rule to match empty fields.
+        ruleset.add_rule(Rule::global_with_priority(Transformers::None(NoneTransformer::with_blank_matcher()), -10));
+        ruleset.add_rule(Rule::global_with_priority(Transformers::Trim(TrimTransformer::new()), -10));
+        ruleset
+    }
+
+    /// Construct a new `Ruleset` without any of the default rules.
+    pub fn without_default_rules() -> Ruleset {
+        Ruleset {
+            rules: BinaryHeap::new()
+        }
+    }
+
+    /// Add a `Rule` to the this ruleset.
+    pub fn add_rule(&mut self, rule: Rule) {
+        self.rules.push(rule);
+    }
+
+    /// Validate this ruleset against a CSV file by comparing it's `Rule`s against the headers.
+    pub fn validate_rules(&self, headers: &Vec<String>) -> Result<(), Vec<ValidationError>> {
+        let mut errors = Vec::new();
+        for rule in self.rules.iter() {
+            if let Applicability::Fields { ref field_names } = rule.applicability {
+                let header_set = HashSet::<String>::from_iter(headers.clone());
+                let field_set = HashSet::<String>::from_iter(field_names.clone());
+                let diff: HashSet<String> = field_set.difference(&header_set).cloned().collect();
+                if diff.len() > 0 {
+                    // FIXME: We should have a better way to construct a ruleset that uses Result
+                    //   instead of panic! here.
+                    errors.push(
+                        ValidationError {
+                            reason: format!("The following fields were not found in headers: '{:?}'", diff),
+                        }
+                    )
+                }
+            }
+        }
+        if errors.is_empty() {
+            Ok(())
+        } else {
+            Err(errors)
+        }
+    }
+
+    /// Apply this `Ruleset` to a record from a CSV file.
+    pub fn apply_rules(&self, headers: &Vec<String>, fields: &Vec<String>, record_n: usize) -> TransformedRecord {
+        let expected_n_fields = headers.len();
+
+        let mut errors: Vec<TransformError> = Vec::new();
+        let mut transformed_fields: Vec<Option<String>> = Vec::new();
+        for (field_n, field_value) in fields.iter().enumerate() {
+            if field_n < expected_n_fields {
+                let field_name = &headers[field_n];
+                let mut transformed_field_value = Some(field_value.clone());
+                // Try each rule in order of priority and test to see if it is applicable.
+                for rule in self.rules.iter() {
+                    let new_value = match transformed_field_value {
+                        Some(ref fv) => {
+                            let transform_result = rule.apply(fv, &field_name, record_n);
+                            match transform_result {
+                                Ok(tfv) => tfv,
+                                Err(e) => {
+                                    errors.push(e);
+                                    None
+                                }
+                            }
+                        },
+                        // The last transformer returned None, so we can short circuit and just
+                        // return None for the field value.
+                        None => break
+                    };
+                    transformed_field_value = new_value;
+                }
+                transformed_fields.insert(field_n, transformed_field_value);
+            } else {
+                errors.push(
+                    TransformError {
+                        field_value: field_value.to_string(),
+                        field_name: field_n.to_string(),
+                        record_n: record_n,
+                        reason: format!("found {} header fields but record had extra field at position {}", expected_n_fields, field_n)
+                    }
+                );
+            }
+        }
+
+        TransformedRecord {
+            field_values: transformed_fields,
+            errors: errors,
+        }
+    }
+}
+
+/// Error for when a `Ruleset` does not validate against a CSV file.
+#[derive(Serialize, Deserialize, Clone, PartialEq, Eq, Hash, Debug)]
+pub struct ValidationError {
+    reason: String,
+}
+
+impl Display for ValidationError
+{
+    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
+        write!(formatter, "{}", self.reason)
+    }
+}
+
+impl error::Error for ValidationError
+{
+    fn description(&self) -> &str {
+        &self.reason
+    }
+}
+
+/// A single processed and transformed record.
+#[derive(Serialize, Deserialize, Eq, PartialEq, Hash, Debug)]
+pub struct TransformedRecord {
+    /// Transformed fields for the record.
+    ///
+    /// Empty field are explicitly encoded as `None` values.
+    pub field_values: Vec<Option<String>>,
+    /// Errors that were encountered during transformation, if any.
+    pub errors: Vec<TransformError>,
+}
--- a/src/transformer.rs
+++ b/src/transformer.rs
@ -0,0 +1,91 @@
+//! Traits and types that define transformations on CSV record fields.
+
+use std::result;
+use std::error;
+use std::fmt::{
+    self,
+    Formatter,
+    Display,
+};
+
+/// `Result` for the transformation of a CSV record's field, either an `Option<String>` if
+/// successfully transformed or a `TransformError` if unsuccessful.
+pub type TransformResult = result::Result<Option<String>, TransformError>;
+
+/// Helper trait with a few useful utility methods for constructing `TransformResult`.
+pub trait TransformResultHelper
+{
+    /// Construct a `TransformResult` that represents a successful transformation of a CSV record's
+    /// field with a non-empty value.
+    fn present(value: &str) -> TransformResult {
+        Ok(Some(value.to_string()))
+    }
+
+    /// Construct a `TransformResult` that represents a successful tranformation of a CSV record's
+    /// field with an empty value.
+    fn excluded() -> TransformResult {
+        Ok(None)
+    }
+
+    /// Construct a `TransformResult` that represents a failed transformation of a CSV record's
+    /// field with a descritive error reason.
+    ///
+    /// An error reason should be a short, single sentence without punctuation or capitization,
+    /// e.g. "not a valid email address" instead of "The email address was invalid.".
+    ///
+    /// ```
+    /// use csv_sanity::transformer::{
+    ///     TransformResult,
+    ///     TransformError,
+    ///     TransformResultHelper,
+    /// };
+    ///
+    /// let result = TransformResult::error("jak,.@hot mail.com", "Email", 0, "not a valid email address");
+    /// assert_eq!(result, Err(TransformError {
+    ///     field_value: "jak,.@hot mail.com".to_string(),
+    ///     field_name: "Email".to_string(),
+    ///     record_n: 0,
+    ///     reason: "not a valid email address".to_string(),
+    /// }));
+    /// ```
+    fn error(field_value: &str, field_name: &str, record_n: usize, reason: &str) -> TransformResult {
+        Err(
+            TransformError {
+                field_value: field_value.to_string(),
+                field_name: field_name.to_string(),
+                record_n: record_n,
+                reason: reason.to_string(),
+            }
+        )
+    }
+}
+
+impl TransformResultHelper for TransformResult {}
+
+pub trait Transformer
+{
+    fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult;
+}
+
+#[derive(RustcEncodable, Deserialize, Serialize, Clone, PartialEq, Eq, Hash, Debug)]
+pub struct TransformError
+{
+    pub record_n: usize,
+    pub field_name: String,
+    pub field_value: String,
+    pub reason: String,
+}
+
+impl Display for TransformError
+{
+    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
+        write!(formatter, "failed to transform field: {}", self.reason)
+    }
+}
+
+impl error::Error for TransformError
+{
+    fn description(&self) -> &str {
+        &self.reason
+    }
+}
--- a/src/transformers/capitalize.rs
+++ b/src/transformers/capitalize.rs
@ -0,0 +1,41 @@
+use Transformer;
+use transformer::{
+    TransformResultHelper,
+    TransformResult
+};
+
+use unicode_segmentation::UnicodeSegmentation;
+
+pub fn capitalize(string: &str) -> String
+{
+    string.unicode_words()
+        .map(capitalize_word).collect::<Vec<String>>()
+        .join(" ")
+}
+
+fn capitalize_word(word: &str) -> String
+{
+    word.chars().enumerate()
+        .map(|(i, c)| if i == 0 { c.to_uppercase().collect::<String>() } else { c.to_lowercase().collect() })
+        .collect()
+}
+
+#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
+pub struct CapitalizeTransformer {}
+
+impl CapitalizeTransformer
+{
+    pub fn new() -> CapitalizeTransformer
+    {
+        CapitalizeTransformer {}
+    }
+}
+
+impl Transformer for CapitalizeTransformer
+{
+    fn transform(&self, field_value: &str, _: &str, _: usize) -> TransformResult
+    {
+        let result = capitalize(field_value);
+        TransformResult::present(&result)
+    }
+}
--- a/src/transformers/choice.rs
+++ b/src/transformers/choice.rs
@ -0,0 +1,37 @@
+use Transformer;
+use transformer::{
+    TransformResultHelper,
+    TransformResult
+};
+
+#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
+pub struct ChoiceTransformer {
+    choices: Vec<String>,
+}
+
+impl ChoiceTransformer
+{
+    pub fn new(choices: Vec<String>) -> ChoiceTransformer
+    {
+        ChoiceTransformer {
+            choices: choices,
+        }
+    }
+}
+
+impl Transformer for ChoiceTransformer
+{
+    fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult
+    {
+        if self.choices.contains(&field_value.to_string()) {
+            TransformResult::present(&field_value)
+        } else {
+            TransformResult::error(
+                field_value,
+                field_name,
+                record_n,
+                &format!("not in valid choices {:?}", self.choices)
+            )
+        }
+    }
+}
--- a/src/transformers/date.rs
+++ b/src/transformers/date.rs
@ -0,0 +1,41 @@
+use Transformer;
+use transformer::{
+    TransformResultHelper,
+    TransformResult
+};
+
+use time::{
+    strptime
+};
+
+#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
+pub struct DateTransformer {
+    input_formats: Vec<String>,
+    output_format: String
+}
+
+impl DateTransformer {
+    pub fn new(input_formats: Vec<String>, output_format: &str) -> DateTransformer {
+        DateTransformer {
+            input_formats: input_formats,
+            output_format: output_format.to_string()
+        }
+    }
+
+    pub fn with_iso8601_output(input_formats: Vec<String>) -> DateTransformer {
+        Self::new(input_formats, "%F")
+    }
+}
+
+impl Transformer for DateTransformer {
+    fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
+        for format in self.input_formats.iter() {
+            if let Ok(time) = strptime(field_value, &format) {
+                return TransformResult::present(
+                    &format!("{}", time.strftime(&self.output_format).unwrap())
+                );
+            }
+        }
+        TransformResult::error(field_value, field_name, record_n, "unable to parse as date")
+    }
+}
--- a/src/transformers/email.rs
+++ b/src/transformers/email.rs
@ -0,0 +1,30 @@
+use Transformer;
+use transformer::{
+    TransformResultHelper,
+    TransformResult
+};
+
+use regex::Regex;
+
+lazy_static! {
+    static ref EMAIL_REGEX: Regex = Regex::new(r"(?i)\A[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\z").unwrap();
+}
+
+#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
+pub struct EmailTransformer {}
+
+impl EmailTransformer {
+    pub fn new() -> EmailTransformer {
+        EmailTransformer {}
+    }
+}
+
+impl Transformer for EmailTransformer {
+    fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
+        if EMAIL_REGEX.is_match(field_value) {
+            TransformResult::present(&field_value.to_lowercase())
+        } else {
+            TransformResult::error(field_value, field_name, record_n, "invalid email address")
+        }
+    }
+}
--- a/src/transformers/mod.rs
+++ b/src/transformers/mod.rs
@ -0,0 +1,76 @@
+use transformer::{
+    Transformer,
+    TransformResult,
+};
+
+mod trim;
+pub use self::trim::TrimTransformer;
+
+mod none;
+pub use self::none::NoneTransformer;
+
+mod regex;
+pub use self::regex::{
+    RegexTransformer,
+    RegexMatchTransformer
+};
+
+mod capitalize;
+pub use self::capitalize::{
+    CapitalizeTransformer,
+    capitalize
+};
+
+mod email;
+pub use self::email::EmailTransformer;
+
+mod number;
+pub use self::number::NumberTransformer;
+
+mod date;
+pub use self::date::DateTransformer;
+
+mod choice;
+pub use self::choice::ChoiceTransformer;
+
+mod zipcode;
+pub use self::zipcode::ZipcodeTransformer;
+
+mod phone_number;
+pub use self::phone_number::PhoneNumberTransformer;
+
+#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
+pub enum Transformers
+{
+    Trim(TrimTransformer),
+    None(NoneTransformer),
+    Regex(RegexTransformer),
+    RegexMatch(RegexMatchTransformer),
+    Capitalize(CapitalizeTransformer),
+    Email(EmailTransformer),
+    Number(NumberTransformer),
+    Date(DateTransformer),
+    Choice(ChoiceTransformer),
+    Zipcode(ZipcodeTransformer),
+    PhoneNumber(PhoneNumberTransformer),
+}
+
+impl Transformer for Transformers {
+    fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
+        use self::Transformers::*;
+
+        match *self {
+            Trim(ref t) => t.transform(field_value, field_name, record_n),
+            None(ref t) => t.transform(field_value, field_name, record_n),
+            Regex(ref t) => t.transform(field_value, field_name, record_n),
+            RegexMatch(ref t) => t.transform(field_value, field_name, record_n),
+            Capitalize(ref t) => t.transform(field_value, field_name, record_n),
+            Email(ref t) => t.transform(field_value, field_name, record_n),
+            Number(ref t) => t.transform(field_value, field_name, record_n),
+            Date(ref t) => t.transform(field_value, field_name, record_n),
+            Choice(ref t) => t.transform(field_value, field_name, record_n),
+            Zipcode(ref t) => t.transform(field_value, field_name, record_n),
+            PhoneNumber(ref t) => t.transform(field_value, field_name, record_n)
+        }
+    }
+}
--- a/src/transformers/none.rs
+++ b/src/transformers/none.rs
@ -0,0 +1,34 @@
+use Transformer;
+use transformer::{
+    TransformResultHelper,
+    TransformResult
+};
+use newtypes::Regex;
+
+use regex;
+
+
+#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
+pub struct NoneTransformer {
+    regex: Regex
+}
+
+impl NoneTransformer {
+    pub fn new(regex: regex::Regex) -> NoneTransformer {
+        NoneTransformer { regex: Regex::from(regex) }
+    }
+
+    pub fn with_blank_matcher() -> NoneTransformer {
+        Self::new(regex::Regex::new(r"\A(?:[:cntrl:]|\s)*\z").unwrap())
+    }
+}
+
+impl Transformer for NoneTransformer {
+    fn transform(&self, field_value: &str, _: &str, _: usize) -> TransformResult {
+        if self.regex.is_match(field_value) {
+            TransformResult::excluded()
+        } else {
+            TransformResult::present(field_value)
+        }
+    }
+}
--- a/src/transformers/number.rs
+++ b/src/transformers/number.rs
@ -0,0 +1,30 @@
+use Transformer;
+use transformer::{
+    TransformResultHelper,
+    TransformResult
+};
+
+use regex::Regex;
+
+lazy_static! {
+    static ref INTEGER_REGEX: Regex = Regex::new(r"\A(:?0|[1-9]\d*)\z").unwrap();
+}
+
+#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
+pub struct NumberTransformer { }
+
+impl NumberTransformer {
+    pub fn match_integer() -> NumberTransformer {
+        NumberTransformer { }
+    }
+}
+
+impl Transformer for NumberTransformer {
+    fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
+        if INTEGER_REGEX.is_match(field_value) {
+            TransformResult::present(field_value)
+        } else {
+            TransformResult::error(field_value, field_name, record_n, "not a valid number")
+        }
+    }
+}
--- a/src/transformers/phone_number.rs
+++ b/src/transformers/phone_number.rs
@ -0,0 +1,34 @@
+use Transformer;
+use transformer::{
+    TransformResultHelper,
+    TransformResult
+};
+
+use regex::Regex;
+
+lazy_static! {
+    static ref NANP_REGEX: Regex = Regex::new(r"\A(?:\+?1)?\D*\(?(?P<area>\d{3})\)?\D*(?P<exchange>\d{3})\D*(?P<subscriber>\d{4})\z").unwrap();
+}
+
+#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
+pub struct PhoneNumberTransformer { }
+
+impl PhoneNumberTransformer {
+    pub fn expect_nanp_format() -> PhoneNumberTransformer {
+        PhoneNumberTransformer { }
+    }
+}
+
+impl Transformer for PhoneNumberTransformer {
+    fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
+        if let Some(captures) = NANP_REGEX.captures(field_value) {
+            let area_code = captures.name("area").unwrap().as_str();
+            let exchange_code = captures.name("exchange").unwrap().as_str();
+            let subscriber_number = captures.name("subscriber").unwrap().as_str();
+            let phone_number = format!("+1 {} {} {}", area_code, exchange_code, subscriber_number);
+            TransformResult::present(&phone_number)
+        } else {
+            TransformResult::error(field_value, field_name, record_n, "not a valid NANP format phone number")
+        }
+    }
+}
--- a/src/transformers/regex.rs
+++ b/src/transformers/regex.rs
@ -0,0 +1,88 @@
+use Transformer;
+use transformer::{
+    TransformResultHelper,
+    TransformResult
+};
+use newtypes::Regex;
+
+use regex;
+
+#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
+pub struct RegexTransformer
+{
+    regex: Regex,
+    template: String
+}
+
+impl RegexTransformer
+{
+    pub fn new(regex: regex::Regex, template: &str) -> RegexTransformer {
+        RegexTransformer {
+            regex: Regex::from(regex),
+            template: template.to_string()
+        }
+    }
+}
+
+impl Transformer for RegexTransformer
+{
+    fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
+        if let Some(captures) = self.regex.captures(field_value) {
+            let mut expansion = String::new();
+            captures.expand(&self.template, &mut expansion);
+            TransformResult::present(&expansion)
+        } else {
+            TransformResult::error(
+                field_value,
+                field_name,
+                record_n,
+                &format!("did not match pattern {}", self.regex)
+            )
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
+pub struct RegexMatchTransformer
+{
+    regex: Regex,
+    negate: bool
+}
+
+impl RegexMatchTransformer
+{
+    pub fn matching(regex: regex::Regex) -> RegexMatchTransformer {
+        RegexMatchTransformer {
+            regex: Regex::from(regex),
+            negate: false
+        }
+    }
+
+    pub fn not_matching(regex: regex::Regex) -> RegexMatchTransformer {
+        RegexMatchTransformer {
+            regex: Regex::from(regex),
+            negate: true
+        }
+    }
+}
+
+impl Transformer for RegexMatchTransformer
+{
+    fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
+        let mut is_match = self.regex.is_match(field_value);
+        if self.negate {
+            is_match = !is_match;
+        }
+
+        if is_match {
+            TransformResult::present(field_value)
+        } else {
+            let reason = if self.negate {
+                format!("matched exclusionary pattern {}", self.regex)
+            } else {
+                format!("did not match pattern {}", self.regex)
+            };
+            TransformResult::error(field_value, field_name, record_n, &reason)
+        }
+    }
+}
--- a/src/transformers/trim.rs
+++ b/src/transformers/trim.rs
@ -0,0 +1,20 @@
+use Transformer;
+use transformer::{
+    TransformResultHelper,
+    TransformResult
+};
+
+#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
+pub struct TrimTransformer {}
+
+impl TrimTransformer {
+    pub fn new() -> TrimTransformer {
+        TrimTransformer {}
+    }
+}
+
+impl Transformer for TrimTransformer {
+    fn transform(&self, field_value: &str, _: &str, _: usize) -> TransformResult {
+        TransformResult::present(field_value.trim())
+    }
+}
--- a/src/transformers/zipcode.rs
+++ b/src/transformers/zipcode.rs
@ -0,0 +1,37 @@
+use Transformer;
+use transformer::{
+    TransformResultHelper,
+    TransformResult
+};
+
+use regex::Regex;
+
+lazy_static! {
+    static ref ZIP_REGEX: Regex = Regex::new(r"\A(\d{5})\D*(?:(\d{4}))?\z").unwrap();
+}
+
+#[derive(Serialize, Deserialize, PartialEq, Eq, Hash, Clone, Debug)]
+pub struct ZipcodeTransformer { }
+
+impl ZipcodeTransformer {
+    pub fn new() -> ZipcodeTransformer {
+        ZipcodeTransformer { }
+    }
+}
+
+impl Transformer for ZipcodeTransformer {
+    fn transform(&self, field_value: &str, field_name: &str, record_n: usize) -> TransformResult {
+        if let Some(captures) = ZIP_REGEX.captures(field_value) {
+            let base_code = captures.get(1).unwrap();
+            let plus_four_code = captures.get(2);
+            let zipcode = if let Some(pfc) = plus_four_code {
+                format!("{}-{}", base_code.as_str(), pfc.as_str())
+            } else {
+                base_code.as_str().to_string()
+            };
+            TransformResult::present(&zipcode)
+        } else {
+            TransformResult::error(field_value, field_name, record_n, "not a valid zipcode")
+        }
+    }
+}