From 93d851f5232b29c6737ed1e472b2e3617669d674 Mon Sep 17 00:00:00 2001 From: dsc Date: Mon, 16 Apr 2012 13:02:30 -0700 Subject: [PATCH] CSV parser, supporting transforms. --- lib/util/csv.co | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/util/index.co | 3 +- www/modules.yaml | 2 + 3 files changed, 196 insertions(+), 1 deletions(-) create mode 100644 lib/util/csv.co diff --git a/lib/util/csv.co b/lib/util/csv.co new file mode 100644 index 0000000..cdab5ae --- /dev/null +++ b/lib/util/csv.co @@ -0,0 +1,192 @@ +_ = require 'kraken/util/underscore' +op = require 'kraken/util/op' + + +DASH_PATTERN = /-/g +BLANK_LINE_PATTERN = /^(\s*)$/ +COMMENT_PATTERN = /\s*(#|\/\/).*$/ + +class CSVData + DEFAULT_OPTIONS : + colSep : ',' + rowSep : '\n' + defaultType : 'float' + customBars : false + customSep : ';' + errorBars : false + fractions : false + fractionSep : '/' + skipBlankLines : true + blankLinePat : BLANK_LINE_PATTERN + removeCommentedText : true + commentPat : COMMENT_PATTERN + replaceMissing : false + replaceMissingValue : 0 + replaceNaN : false + replaceNaNValue : 0 + padRows : false + padRowsValue : 0 + + options : {} + + labels : [] + types : [] + + rawData : null # String + origData : null # row-oriented (untransformed) + data : null # row-oriented + columns : null # column-oriented (includes date column) + dateColumn : null # only date column + dataColumns : null # column-oriented (excludes date column) + + transforms : null # Array> Row -> Col -> Stack of Transforms + + + (data, opts) -> + unless typeof data is 'string' or _.isArray data + [opts, data] = [data, null] + @options = _.clone(@DEFAULT_OPTIONS) import (opts or {}) + # for k in @DEFAULT_OPTIONS then this[k] ?= @options[k] + @transforms = [] + @labels = @options.labels or [] + @types = @options.types or [] + @parse that if data or @options.data + + + /* * * * Parsing * * * */ + + parseNumber: (s) -> + parseFloat s + + parseHiLo: (s) -> + s.split @options.customBars .map @parseNumber, this + + parseFraction: (s) -> + s.split @options.fractionSep .map @parseNumber, this + + parseDate: (s) -> + new Date s.replace DASH_PATTERN, '/' + + + parse: (@rawData) -> + o = @options + + lines = rawData.split o.rowSep + return [] unless lines.length + first = lines[0] + + # Use the default delimiter or fall back to a tab if that makes sense. + delim = o.colSep + if first.indexOf(delim) is -1 and first.indexOf('\t') >= 0 + delim = '\t' + + data = @data = [] + @columns = [] + @dataColumns = [] + + parser = @parseNumber + parser = @parseHiLo if o.customBars + parser = @parseFraction if o.fractions + + hasHeaders = @labels.length is not 0 + for line, i of lines + line .= replace o.commentPat, '' if o.removeCommentedText + continue if o.skipBlankLines and (line.length is 0 or o.blankLinePat.test line) + + cols = line.split delim + unless hasHeaders + hasHeaders = true + @labels = cols.map -> _.strip it + continue + + continue unless cols.length > 1 + date = @parseDate cols.shift() + fields = cols.map parser, this + if o.errorBars + fields = fields.reduce do + (acc, v) -> + last = acc[acc.length-1] + unless last and last.length < 2 + acc.push last = [] + last.push v + acc + [] + + fields.unshift date + data.push fields + fields.forEach (v, idx) ~> + @columns.push [] unless @columns[idx] + @columns[idx].push v + + @origData = _.merge [], @data + while @transforms.length < @columns.length + @transforms.push [] + @dateColumn = @columns[0] + @dataColumns = @columns.slice(1) + this + + + + /* * * * Data Transformation * * * */ + + /** + * Rebuilds the row-oriented data matrix from the columns. + */ + rebuildData: -> + @data = _.zip ...@columns + @dateColumn = @columns[0] + @dataColumns = @columns.slice(1) + this + + /** + * Rebuilds the column-oriented data matrix from the columns. + */ + rebuildColumns: -> + @columns = _.zip ...@data + @dateColumn = @columns[0] + @dataColumns = @columns.slice(1) + this + + /** + * Map a function across the specified columns, one-by-one (in column-major + * order), replacing the data with the mapped result. + * + * @param {Number|Array} indices List one or more column indices to map. Negative + * numbers are offset from the end of the columns list. + * @param {Function} fn Mapping function of the form: + * `(single_value, row_idx, column) -> new_value` + * @param {Object} [ctx=this] Execution context. + * @returns {this} + */ + addTransform: (indices, fn, ctx=this) -> + num_cols = @columns.length + if typeof idx is 'function' + [ctx, fn, indices] = [fn, indices, null] + unless indices? + indices = _.range num_cols + unless _.isArray indices + indices = [indices] + for idx of indices + idx %= num_cols + idx += num_cols if idx < 0 + @transforms[idx].push fn + @applyTransforms() + + addDataTransform: (fn, ctx=this) -> + @addTransform _.range(1, @columns.length), fn, ctx + + applyTransforms: -> + for fns, idx of @transforms + for fn of fns + @columns[idx] .= map fn, ctx + @rebuildData() + + clearTransforms: -> + @transforms = [] + @data = _.merge [], @origData + @rebuildColumns() + + +module.exports = CSVData + + diff --git a/lib/util/index.co b/lib/util/index.co index 35cb297..e98d502 100644 --- a/lib/util/index.co +++ b/lib/util/index.co @@ -12,7 +12,8 @@ op = require 'kraken/util/op' backbone = require 'kraken/util/backbone' parser = require 'kraken/util/parser' Cascade = require 'kraken/util/cascade' -exports import { root, _, op, backbone, parser, Cascade, } +CSVData = require 'kraken/util/csv' +exports import { root, _, op, backbone, parser, Cascade, CSVData, } # HashSet = require 'kraken/util/hashset' # BitString = require 'kraken/util/bitstring' diff --git a/www/modules.yaml b/www/modules.yaml index 1e965c5..1f46bcb 100644 --- a/www/modules.yaml +++ b/www/modules.yaml @@ -52,11 +52,13 @@ dev: - backbone - parser - cascade + - csv - index - base: - base-mixin - base-model - base-view + - model-cache - cascading-model - index - scaffold: -- 1.7.0.4