From d93d2e33ec678640343af08b9182e4547db11377 Mon Sep 17 00:00:00 2001 From: dsc Date: Wed, 18 Apr 2012 14:27:43 -0700 Subject: [PATCH] Adds base TimeSeriesData class. --- lib/util/csv.co | 196 ------------------------------------- lib/util/timeseries/csv.co | 113 +++++++++++++++++++++ lib/util/timeseries/timeseries.co | 128 ++++++++++++++++++++++++ 3 files changed, 241 insertions(+), 196 deletions(-) delete mode 100644 lib/util/csv.co create mode 100644 lib/util/timeseries/csv.co create mode 100644 lib/util/timeseries/index.co create mode 100644 lib/util/timeseries/timeseries.co diff --git a/lib/util/csv.co b/lib/util/csv.co deleted file mode 100644 index 58fa779..0000000 --- a/lib/util/csv.co +++ /dev/null @@ -1,196 +0,0 @@ -_ = require 'kraken/util/underscore' -op = require 'kraken/util/op' - - -DASH_PATTERN = /-/g -BLANK_LINE_PATTERN = /^(\s*)$/ -COMMENT_PATTERN = /\s*(#|\/\/).*$/ - -class CSVData - DEFAULT_OPTIONS : - colSep : ',' - rowSep : '\n' - defaultType : 'float' - customBars : false - customSep : ';' - errorBars : false - fractions : false - fractionSep : '/' - skipBlankLines : true - blankLinePat : BLANK_LINE_PATTERN - removeCommentedText : true - commentPat : COMMENT_PATTERN - replaceMissing : false - replaceMissingValue : 0 - replaceNaN : false - replaceNaNValue : 0 - padRows : false - padRowsValue : 0 - - options : {} - - labels : [] - types : [] - - rawData : null # String - origData : null # row-oriented (untransformed) - data : null # row-oriented - columns : null # column-oriented (includes date column) - dateColumn : null # only date column - dataColumns : null # column-oriented (excludes date column) - - transforms : null # Array> Row -> Col -> Stack of Transforms - - - (data, opts) -> - unless typeof data is 'string' or _.isArray data - [opts, data] = [data, null] - @options = _.clone(@DEFAULT_OPTIONS) import (opts or {}) - # for k in @DEFAULT_OPTIONS then this[k] ?= @options[k] - @transforms = [] - @labels = @options.labels or [] - @types = @options.types or [] - @parse that if data or @options.data - - - /* * * * Parsing * * * */ - - parseNumber: (s) -> - parseFloat s - - parseHiLo: (s) -> - s.split @options.customBars .map @parseNumber, this - - parseFraction: (s) -> - s.split @options.fractionSep .map @parseNumber, this - - parseDate: (s) -> - new Date s.replace DASH_PATTERN, '/' - - - parse: (@rawData) -> - o = @options - - lines = rawData.split o.rowSep - return [] unless lines.length - first = lines[0] - - # Use the default delimiter or fall back to a tab if that makes sense. - delim = o.colSep - if first.indexOf(delim) is -1 and first.indexOf('\t') >= 0 - delim = '\t' - - data = @data = [] - @columns = [] - @dataColumns = [] - - parser = @parseNumber - parser = @parseHiLo if o.customBars - parser = @parseFraction if o.fractions - - hasHeaders = @labels.length is not 0 - for line, i of lines - line .= replace o.commentPat, '' if o.removeCommentedText - continue if o.skipBlankLines and (line.length is 0 or o.blankLinePat.test line) - - cols = line.split delim - unless hasHeaders - hasHeaders = true - @labels = cols.map -> _.strip it - continue - - continue unless cols.length > 1 - date = @parseDate cols.shift() - fields = cols.map parser, this - if o.errorBars - fields = fields.reduce do - (acc, v) -> - last = acc[acc.length-1] - unless last and last.length < 2 - acc.push last = [] - last.push v - acc - [] - - fields.unshift date - data.push fields - fields.forEach (v, idx) ~> - @columns.push [] unless @columns[idx] - @columns[idx].push v - - @origData = _.merge [], @data - while @transforms.length < @columns.length - @transforms.push [] - @dateColumn = @columns[0] - @dataColumns = @columns.slice(1) - this - - - - /* * * * Data Transformation * * * */ - - /** - * Rebuilds the row-oriented data matrix from the columns. - */ - rebuildData: -> - @data = _.zip ...@columns - @dateColumn = @columns[0] - @dataColumns = @columns.slice(1) - this - - /** - * Rebuilds the column-oriented data matrix from the columns. - */ - rebuildColumns: -> - @columns = _.zip ...@data - @dateColumn = @columns[0] - @dataColumns = @columns.slice(1) - this - - /** - * Map a function across the specified columns, one-by-one (in column-major - * order), replacing the data with the mapped result. - * - * @param {Number|Array} indices List one or more column indices to map. Negative - * numbers are offset from the end of the columns list. - * @param {Function} fn Mapping function of the form: - * `(single_value, row_idx, column) -> new_value` - * @param {Object} [ctx=this] Execution context. - * @returns {this} - */ - addTransform: (indices, fn, ctx=this) -> - num_cols = @columns.length - if typeof idx is 'function' - [ctx, fn, indices] = [fn, indices, null] - unless indices? - indices = _.range num_cols - unless _.isArray indices - indices = [indices] - for idx of indices - idx %= num_cols - idx += num_cols if idx < 0 - @transforms[idx].push fn - @applyTransforms() - - addDataTransform: (fn, ctx=this) -> - @addTransform _.range(1, @columns.length), fn, ctx - - applyTransforms: -> - for fns, idx of @transforms - for fn of fns - @columns[idx] .= map fn, ctx - @rebuildData() - - clearTransforms: -> - @transforms = [] - @data = _.merge [], @origData - @rebuildColumns() - - - toJSON: -> - _.merge [], @data - - -module.exports = CSVData - - diff --git a/lib/util/timeseries/csv.co b/lib/util/timeseries/csv.co new file mode 100644 index 0000000..f6ad4f1 --- /dev/null +++ b/lib/util/timeseries/csv.co @@ -0,0 +1,113 @@ +_ = require 'kraken/util/underscore' +op = require 'kraken/util/op' + + +DASH_PATTERN = /-/g +BLANK_LINE_PATTERN = /^(\s*)$/ +COMMENT_PATTERN = /\s*(#|\/\/).*$/ + +class CSVData extends TimeSeriesData + DEFAULT_OPTIONS : + colSep : ',' + rowSep : '\n' + defaultType : 'float' + customBars : false + customSep : ';' + errorBars : false + fractions : false + fractionSep : '/' + skipBlankLines : true + blankLinePat : BLANK_LINE_PATTERN + removeCommentedText : true + commentPat : COMMENT_PATTERN + replaceMissing : false + replaceMissingValue : 0 + replaceNaN : false + replaceNaNValue : 0 + padRows : false + padRowsValue : 0 + + + (data, opts) -> + super ... + + + /* * * * CSV Parsing * * * */ + + parseNumber: (s) -> + parseFloat s + + parseHiLo: (s) -> + s.split @options.customBars .map @parseNumber, this + + parseFraction: (s) -> + s.split @options.fractionSep .map @parseNumber, this + + parseDate: (s) -> + new Date s.replace DASH_PATTERN, '/' + + + /** + * Parses a CSV string + */ + parse: (@rawData) -> + if typeof rawData is not 'string' + + o = @options + + lines = rawData.split o.rowSep + return [] unless lines.length + first = lines[0] + + # Use the default delimiter or fall back to a tab if that makes sense. + delim = o.colSep + if first.indexOf(delim) is -1 and first.indexOf('\t') >= 0 + delim = '\t' + + rows = @rows = [] + @columns = [] + + parser = @parseNumber + parser = @parseHiLo if o.customBars + parser = @parseFraction if o.fractions + + hasHeaders = @labels.length is not 0 + for line, i of lines + line .= replace o.commentPat, '' if o.removeCommentedText + continue if o.skipBlankLines and (line.length is 0 or o.blankLinePat.test line) + + cols = line.split delim + unless hasHeaders + hasHeaders = true + @labels = cols.map -> _.strip it + continue + + continue unless cols.length > 1 + date = @parseDate cols.shift() + fields = cols.map parser, this + if o.errorBars + fields = fields.reduce do + (acc, v) -> + last = acc[acc.length-1] + unless last and last.length < 2 + acc.push last = [] + last.push v + acc + [] + + fields.unshift date + rows.push fields + fields.forEach (v, idx) ~> + @columns.push [] unless @columns[idx] + @columns[idx].push v + + @untransformedRows = _.merge [], @rows + this + + + + + +module.exports = exports = CSVData + + diff --git a/lib/util/timeseries/index.co b/lib/util/timeseries/index.co new file mode 100644 index 0000000..e69de29 diff --git a/lib/util/timeseries/timeseries.co b/lib/util/timeseries/timeseries.co new file mode 100644 index 0000000..d7f46fe --- /dev/null +++ b/lib/util/timeseries/timeseries.co @@ -0,0 +1,128 @@ +_ = require 'kraken/util/underscore' +op = require 'kraken/util/op' + + + +/** + * @class Represents a collection of data columns aligned along a common timeline. + */ +class TimeSeriesData + DEFAULT_OPTIONS : {} + + options : {} + labels : [] + types : [] + + untransformedRows : null # row-oriented (untransformed) + rows : null # row-oriented + columns : null # column-oriented (includes date column) + dateColumn : null # only date column + dataColumns : null # column-oriented (excludes date column) + + + /** + * @constructor + */ + (data, opts) -> + unless typeof data is 'string' or _.isArray data + [opts, data] = [data, null] + @options = _.clone(@DEFAULT_OPTIONS) import (opts or {}) + + @transforms = [] + @labels = @options.labels or [] + @types = @options.types or [] + + @parse that if data or @options.data + @rebuildDerived() + + + + /* * * * Parsing * * * */ + + /** + * Stub. Subclass and override to perform preprocessing of the data. + */ + parse : (rawData) -> + this + + /** + * Rebuilds the row-oriented data matrix from the columns. + */ + rebuildData: -> + @rows = _.zip ...@columns + @rebuildDerived() + + /** + * Rebuilds the column-oriented data matrix from the columns. + */ + rebuildColumns: -> + @columns = _.zip ...@rows + @rebuildDerived() + + rebuildDerived: -> + while @transforms.length < @columns.length + @transforms.push [] + @dateColumn = @columns[0] + @dataColumns = @columns.slice(1) + this + + + + /* * * * Data Transformation * * * */ + + applyTransforms: -> + for fns, idx of @transforms + for fn of fns + @columns[idx] .= map fn, ctx + @rebuildData() + + clearTransforms: -> + @transforms = [] + @rows = _.merge [], @untransformedRows + @rebuildColumns() + + /** + * Map a function across the specified columns, one-by-one (in column-major + * order), replacing the data with the mapped result. + * + * @param {Number|Array} indices List one or more column indices to map. Negative + * numbers are offset from the end of the columns list. + * @param {Function} fn Mapping function of the form: + * `(single_value, row_idx, column) -> new_value` + * @param {Object} [ctx=this] Execution context. + * @returns {this} + */ + addTransform: (indices, fn, ctx=this) -> + num_cols = @columns.length + if typeof idx is 'function' + [ctx, fn, indices] = [fn, indices, null] + unless indices? + indices = _.range num_cols + unless _.isArray indices + indices = [indices] + for idx of indices + idx %= num_cols + idx += num_cols if idx < 0 + @transforms[idx].push fn + @applyTransforms() + + addDataTransform: (fn, ctx=this) -> + @addTransform _.range(1, @columns.length), fn, ctx + + + + /* * * * Misc * * * */ + + toJSON: -> + _.merge [], @rows + + toString: -> + labels = @labels + .map -> "'#it'" + .join ', ' + "#{@..name or @..displayName}(#labels)" + + + +module.exports = exports = TimeSeriesData + -- 1.7.0.4