I've been trying to make a C# version of my Java CSV Parser using C# specific idioms.
Here is the full code:
using System; using System.Collections; using System.Collections.Generic; using System.IO; using System.Linq; using System.Reflection; namespace CSV { /// <inheritdoc /> public sealed class ParseException : Exception { /// <inheritdoc /> public ParseException() { } /// <inheritdoc /> public ParseException(string message, Exception inner) : base(message, inner) { } } /// <summary> /// This Exception is raised when a type <c>T</c> is not supported by <see cref="Convert.ChangeType(object?,Type)"/> /// nor has a custom parser been registered via <see cref="Parsers.RegisterParser{T}(Converter{string,T})"/> for the type. /// </summary> public sealed class NoSuchParserException : Exception { /// <inheritdoc /> public NoSuchParserException() { } /// <inheritdoc /> public NoSuchParserException(Type t) : base($"There are no supported parsers for {t}") { } } /// <summary> /// This attribute may be applied to any property of a class or struct to indicate that the custom name should /// be matched against the headers of the CSV file instead of the name of the attribute /// </summary> /// /// <example> /// <c>[CSV.PropertyName("value")] public int Num { get; set; }</c> /// </example> [AttributeUsage(AttributeTargets.Property)] public sealed class PropertyNameAttribute : Attribute { /// <summary> /// The name of the property. /// </summary> public string Name { get; } /// <summary> /// Initializes a new instance of <see cref="PropertyNameAttribute"/> with the specified property name. /// </summary> /// <param name="name">The name of the property.</param> public PropertyNameAttribute(string name) => Name = name; } /// <summary> /// A struct for accessing the map of parsers used by <see cref="Parser{TRow}"/> /// </summary> public readonly struct Parsers { internal static readonly Dictionary<Type, Converter<string, object>> Dict = new Dictionary<Type, Converter<string, object>>(); /// <summary> /// Globally registers a parser for <typeparamref name="T"/>, overriding any parser which may exist for the type /// </summary> /// <param name="parser">a <c>Converter</c> from a string to an arbitrary type <c>T</c></param> /// <typeparam name="T">a type to make available for parsing into</typeparam> public static void RegisterParser<T>(Converter<string, T> parser) { object CovarianceCaster(string s) => parser(s); Dict[typeof(T)] = CovarianceCaster; } } /// <summary> /// This class allows CSV text strings to be conveniently and easily parsed into an Enumerable sequence of objects of type <c>TRow</c> /// </summary> /// /// <para> /// By default, CSV.Parser supports parsing all types supported by <see cref="Convert.ChangeType(object?,Type)"/> /// Parsers for other types may be added via <see cref="Parsers.RegisterParser{T}(Converter{string,T})"/>. /// </para> /// /// <example> /// Suppose there exists the following struct <c>Foo</c>: /// <code> /// public struct Foo /// { /// [CSV.PropertyName("Value")] public float X { get; set; } /// public string Name { get; set; } /// } /// </code> /// Given a <see cref="TextReader"/> whose contents are /// <code> /// Name,Value /// hello,3.14 /// world /// </code> /// each line can be parsed into a <c>Foo</c> object using /// <code> /// var csv = new CSV.Parser(reader) /// foreach (var foo in csv) Console.WriteLine(foo); /// </code> /// </example> /// /// <typeparam name="TRow"> /// a type that satisfies the following properties: /// <list type="bullet"> /// <item>It has a no-argument constructor (satisfies the <c>new()</c> constraint)</item> /// <item>Any property which should be affected should have an accessor</item> /// </list> /// </typeparam> public class Parser<TRow> : IEnumerable<TRow> where TRow : new() { private readonly TextReader _reader; private readonly string _delimiter; private readonly List<string> _headers; /// <summary> /// Creates a new CSV.Parser instance from the specified <c>reader</c> whose lines may be parsed into <c>TRow</c> instances /// </summary> /// <param name="reader">a <c>TextReader</c> containing N lines of text, each line containing M data fields /// separated by a <c>delimiter</c></param> /// <param name="delimiter">the delimiter to use</param> public Parser(TextReader reader, string delimiter = ",") { _reader = reader; _delimiter = delimiter; _headers = _reader.ReadLine()?.Split(delimiter).ToList(); } /// <summary> /// Ignores the specified next number of lines. Useful for possible inclusion of metadata in the CSV data. /// </summary> /// <param name="numberOfLines">the number of lines to skip</param> /// <returns>this CSV.Parser instance</returns> public Parser<TRow> Skip(int numberOfLines) { for (var i = 0; i < numberOfLines; i++) { _reader.ReadLine(); } return this; } /// <summary> /// Parses the next line of the associated <see cref="TextReader"/> into a <c>TRow</c> object /// </summary> /// <returns>The parsed TRow object</returns> /// <exception cref="ParseException">There is no valid parser for one of the types of the fields of /// <typeparamref name="TRow"/>, or a parser threw an Exception while parsing</exception> public TRow ReadLine() { var line = _reader.ReadLine(); if (line == null) return default; var split = line.Split(_delimiter); object row = new TRow(); foreach (var prop in typeof(TRow).GetProperties().Where(p => p.CanWrite)) { var attr = prop.GetCustomAttribute<PropertyNameAttribute>(); var name = attr == null ? prop.Name : attr.Name; var idx = _headers.IndexOf(name); if (idx >= split.Length) continue; var parsed = idx == -1 ? null : TryParse(split[idx].Trim(' ', '\"'), prop.PropertyType); prop.SetValue(row, parsed); } return (TRow) row; } private static object TryParse(string s, Type t) { if (Parsers.Dict.ContainsKey(t)) { try { return Parsers.Dict[t].Invoke(s); } catch (Exception e) { throw new ParseException($"The parser for {t} failed", e); } } try { return s != "" ? Convert.ChangeType(s, t) : null; } catch { throw new NoSuchParserException(t); } } /// <summary> /// Returns an <see cref="IEnumerator{T}"/> by repeatedly invoking <see cref="Parser{TRow}.ReadLine()"/>. /// </summary> /// <returns>an <see cref="IEnumerator{T}"/> of all the parsed rows</returns> public IEnumerator<TRow> GetEnumerator() { for (var row = ReadLine(); !row.Equals(default(TRow)); row = ReadLine()) { yield return row; } } IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); } }
My primary concerns are idiomatically implementing exception handling. In particular, I was wondering if
NoSuchParserException
should be removed and useParseException
as a catch all Exception for the class- my implementation of
TryParse
could be improved / designed better
I was also wondering how I should go about the case where the number of properties in TRow
is not equal to the number of headers in the CSV data. I'm not sure if I should ignore the extraneous headers or properties, add an Enum option, or always throw an Exception.