In [1]:
from urllib import urlretrieve
import csv
import datetime
from io import BytesIO
import numpy as np
import json
import copy
import random

import uuid
from IPython.display import HTML

from nupic.encoders import MultiEncoder
import nupic.research.TP
In [2]:
%%javascript
require.config({
        paths: {
            d3: '//d3js.org/d3.v3.min',
            hello: "//mrcslws.com/stuff/segment-stories.2016.04.28"
  }
});
In [3]:
def drawEverything(columnStatesChartBuilder, segmentLifetimesChartBuilder):
    elementId = str(uuid.uuid1())
    addChart = """
    <div id="%s" style="-webkit-touch-callout: none; -webkit-user-select: none; -moz-user-select: none;
    -ms-user-select: none; user-select: none;"></div>
    <script>
    require(['hello', 'd3'], function() {
        var myContainer = zoomableTimeSeries(document.getElementById('%s'));
        insertColumnStatesAndSegmentLifetimes(myContainer, '%s', '%s');
    });
    </script>
    """ % (elementId, elementId,
           columnStatesChartBuilder.output.getvalue().replace('\r', '\\r').replace('\n', '\\n'),
           segmentLifetimesChartBuilder.getOutput().replace('\r', '\\r').replace('\n', '\\n'))
    
    return HTML(addChart)
In [4]:
def parseHotgym(hotgymRaw):
    parsed = []

    csvReader = csv.reader(hotgymRaw)
    csvReader.next()
    csvReader.next()
    csvReader.next()
    
    for row in csvReader:
        timestampStr, consumptionStr = row
        parsed.append({
                'timestamp': datetime.datetime.strptime(timestampStr,
                                                        "%m/%d/%y %H:%M"),
                'consumption': float(consumptionStr)
            })
        
    return parsed

def encodeHotgym(hotgym):
    encoder = MultiEncoder({
        'consumption': {
            'fieldname': 'consumption',
            'name': 'consumption',
            'type': 'ScalarEncoder',
            'minval': 5.0,
            'maxval': 55.0,
            'clipInput': True,
            'n': 478,
            'w': 31,
        },
        'timestamp_timeOfDay': {
            'fieldname': 'timestamp',
            'name': 'timestamp_timeOfDay',
            'timeOfDay': (21, 1),
            'type': 'DateEncoder'
        },
        'timestamp_weekend': {
            'fieldname': 'timestamp',
            'name': 'timestamp_weekend',
            'type': 'DateEncoder',
            'weekend': 21
        },
    })
    
    return map(lambda row : encoder.encode(row), hotgym)
In [5]:
with open(urlretrieve("http://mrcslws.com/stuff/rec-center-hourly.csv")[0],
          'r') as hotgym:
    HOTGYM_ENCODED_ALL = encodeHotgym(parseHotgym(hotgym))
    HOTGYM_INDICES_ALL = map(lambda encoding : set(encoding.nonzero()[0]),
                             HOTGYM_ENCODED_ALL)
    
    HOTGYM_ENCODED = HOTGYM_ENCODED_ALL[:1000]
    HOTGYM_INDICES = HOTGYM_INDICES_ALL[:1000]

Here I've tuned the encoders to produce sparse outputs and to use every bit.

In [6]:
len(HOTGYM_ENCODED[0].nonzero()[0])
Out[6]:
73
In [7]:
# Sparsity
len(HOTGYM_ENCODED[0].nonzero()[0]) / float(len(HOTGYM_ENCODED[0]))
Out[7]:
0.0712890625
In [8]:
# Number of columns
len(HOTGYM_ENCODED[0])
Out[8]:
1024
In [9]:
# Show how often each bit is used
np.set_printoptions(threshold=1024)
reduce(lambda accum, encoding: accum + encoding,
       HOTGYM_ENCODED,
       np.zeros(1024, dtype='uint32'))
Out[9]:
array([140, 157, 186, 208, 225, 228, 228, 228, 229, 230, 231, 231, 231,
       231, 232, 232, 233, 234, 235, 235, 235, 235, 236, 236, 237, 238,
       239, 239, 239, 240, 242, 102,  86,  57,  35,  18,  15,  16,  20,
        21,  21,  22,  24,  24,  25,  24,  24,  23,  23,  22,  22,  25,
        25,  24,  24,  24,  25,  24,  25,  26,  26,  25,  26,  26,  26,
        26,  26,  26,  26,  23,  22,  22,  21,  19,  20,  19,  22,  23,
        24,  24,  25,  29,  27,  27,  28,  28,  28,  27,  28,  30,  29,
        29,  28,  28,  28,  30,  33,  35,  36,  37,  39,  39,  39,  38,
        38,  39,  41,  39,  40,  39,  39,  38,  35,  37,  39,  40,  40,
        39,  40,  39,  36,  36,  38,  40,  40,  39,  37,  35,  33,  32,
        32,  30,  30,  31,  32,  33,  31,  30,  31,  33,  33,  33,  34,
        34,  32,  33,  32,  36,  39,  41,  41,  42,  47,  46,  53,  56,
        63,  66,  66,  67,  71,  69,  68,  67,  67,  69,  68,  69,  68,
        66,  62,  62,  62,  61,  61,  60,  57,  56,  52,  50,  46,  47,
        47,  42,  40,  31,  27,  20,  17,  17,  17,  13,  13,  13,  13,
        11,   8,   8,   7,   7,   7,   7,   7,   6,   6,   5,   6,   6,
         6,   7,   6,   7,   6,   5,   6,   7,   7,   7,   7,   7,   6,
         5,   6,   6,   6,   8,   9,   9,   9,  11,  11,  11,  11,  11,
        12,  12,  15,  15,  15,  15,  17,  18,  17,  18,  18,  17,  17,
        17,  17,  17,  17,  17,  17,  16,  18,  18,  16,  15,  15,  15,
        13,  13,  13,  14,  14,  14,  14,  12,  11,  11,  12,  10,   9,
        10,  10,  13,  14,  13,  13,  15,  17,  18,  23,  28,  32,  35,
        39,  44,  53,  56,  62,  74,  76,  79,  83,  87,  92, 101, 105,
       117, 125, 133, 138, 145, 155, 162, 162, 165, 174, 182, 188, 198,
       204, 207, 212, 215, 218, 216, 218, 217, 220, 220, 217, 219, 222,
       229, 231, 232, 228, 226, 217, 217, 210, 216, 212, 203, 196, 198,
       198, 195, 191, 185, 183, 179, 177, 174, 173, 169, 173, 169, 169,
       166, 163, 156, 156, 155, 148, 142, 139, 138, 138, 139, 135, 134,
       124, 126, 128, 132, 128, 126, 120, 120, 121, 113, 114, 111, 106,
       102,  99,  94,  94,  88,  85,  82,  82,  78,  74,  71,  72,  69,
        66,  64,  62,  58,  57,  56,  52,  49,  46,  45,  43,  44,  41,
        39,  37,  35,  32,  31,  29,  31,  30,  27,  25,  25,  25,  23,
        23,  22,  20,  19,  19,  18,  17,  15,  15,  15,  14,  13,  12,
        10,  10,  10,   9,   8,   7,   7,   5,   5,   4,   4,   1,   1,
         1,   1,   1,   1,   1,   3,   3,   3,   3,   2,   2,   2,   2,
         2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,
         2,   2,   2,   2,   2,   2,   2,   2,   2,   2,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  41,  41,
        41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,
        41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,
        41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,
        41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,
        41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,
        41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,
        41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,
        41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,
        41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,
        41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,
        41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,
        41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  41,
        41,  41,  41,  41,  41,  41,  41,  41,  41,  41,  42,  42,  42,
        42,  42,  42,  42,  42,  42,  42, 682, 682, 682, 682, 682, 682,
       682, 682, 682, 682, 682, 682, 682, 682, 682, 682, 682, 682, 682,
       682, 682, 318, 318, 318, 318, 318, 318, 318, 318, 318, 318, 318,
       318, 318, 318, 318, 318, 318, 318, 318, 318, 318], dtype=uint32)

Experiment: TP.py

In [10]:
class ColumnStatesPatcher(object):
    def __init__(self):
        self.output = BytesIO()

    def patchTP(self, tp):
        csvOutput = csv.writer(self.output)

        headerRow = [
            'n-unpredicted-active-columns',
            'n-predicted-inactive-columns',
            'n-predicted-active-columns',
        ]
        csvOutput.writerow(headerRow)

        computeMethod = tp.compute
        def myCompute(bottomUpInput, **kwargs):
            activeColumns = set(bottomUpInput.nonzero()[0])

            npPredictedCells = tp.getPredictedState().reshape(-1).nonzero()[0]
            predictedColumns = set([cell / tp.cellsPerColumn for cell in npPredictedCells.tolist()])

            computeResult = computeMethod(bottomUpInput, **kwargs)

            row = (
                len(activeColumns - predictedColumns),
                len(predictedColumns - activeColumns),
                len(activeColumns & predictedColumns),
            )
            csvOutput.writerow(row)

            return computeResult

        tp.compute = myCompute
In [11]:
class SegmentLifetimeChartBuilderTP(object):
    def __init__(self):
        # segID => {}
        self.segments = {}
        # (segID, {}) because a segment might get destroyed multiple times
        self.destroyedSegments = []
        self.timestep = 0
        
        self.prevActiveSegments = []
        self.activeSegments = []
        
    def getOutput(self):
        outputSegments = []
        for k, data in self.destroyedSegments:
            out = {
                'birthstep': data['birthstep'],
                'deathstep': data['deathstep'],
            }
            
            if len(data['correctMatches']) > 0:
                out['correctMatches'] = data['correctMatches']
                
            if len(data['correctActivations']) > 0:
                out['correctActivations'] = data['correctActivations']
                
            if len(data['incorrectActivations']) > 0:
                out['incorrectActivations'] = data['incorrectActivations']
                
            outputSegments.append(out)
        for k, data in self.segments.items():
            out = {
                'birthstep': data['birthstep'],
            }
            
            if len(data['correctMatches']) > 0:
                out['correctMatches'] = data['correctMatches']
                
            if len(data['correctActivations']) > 0:
                out['correctActivations'] = data['correctActivations']
                
            if len(data['incorrectActivations']) > 0:
                out['incorrectActivations'] = data['incorrectActivations']
                
            outputSegments.append(out)
        outputSegments = sorted(outputSegments, key = lambda x : x['birthstep'])
        output = {
            'nTimesteps': self.timestep,
            'segments': outputSegments
        }
        return json.dumps(output)
    
    def patchTP(self, tp):
        
        def onCorrectMatchingSegment(segment):
            k = segment.segID
            segmentData = None
            if k in self.segments:
                segmentData = self.segments[k]
            else:
                for k2, data in reversed(self.destroyedSegments):
                    if k2 == k:
                        segmentData = data
                        break

            assert segmentData is not None
            
            segmentData['correctMatches'].append(self.timestep)
        tp.onCorrectMatchingSegment = onCorrectMatchingSegment

        def onActiveSegment(segment):
            self.activeSegments.append(segment.segID)
        tp.onActiveSegment = onActiveSegment

        def onDestroySegment(segment):
            k = segment.segID

            data = self.segments[k]
            del self.segments[k]

            data['deathstep'] = self.timestep

            v = (k, data)
            self.destroyedSegments.append(v)
        tp.onDestroySegment = onDestroySegment
        
        def onCreateSegment(c, i, segment):
            k = segment.segID
            assert k not in self.segments
            
            cell = c*tp.cellsPerColumn + i

            self.segments[k] = {
                'cell': cell,
                'birthstep': self.timestep,
                'correctMatches': [],
                'correctActivations': [],
                'incorrectActivations': []
            }
        tp.onCreateSegment = onCreateSegment
        
        compute = tp.compute
        def myCompute(bottomUpInput, **kwargs):
            self.beforeCompute(tp)
            compute(bottomUpInput, **kwargs)
            self.afterCompute(tp)
        tp.compute = myCompute

    
    def beforeCompute(self, tp):
        self.prevActiveSegments = self.activeSegments
        self.activeSegments = []
    
    def afterCompute(self, tp):
        for k in self.prevActiveSegments:
            segmentData = None
            if k in self.segments:
                segmentData = self.segments[k]
            else:
                for k2, data in reversed(self.destroyedSegments):
                    if k2 == k:
                        segmentData = data
                        break

            assert segmentData is not None

            activeCells = set(tp.getActiveState().nonzero()[0].tolist())
            if segmentData['cell'] in activeCells:
                segmentData['correctActivations'].append(self.timestep)
            else:
                segmentData['incorrectActivations'].append(self.timestep)

        self.timestep += 1
In [12]:
experiment1_column_states = ColumnStatesPatcher()
experiment1_segment_lifetimes = SegmentLifetimeChartBuilderTP()

def experiment1(tp=None):
    if tp is None:
        tp = nupic.research.TP.TP(
                numberOfCols=1024,
                cellsPerColumn=4,
                initialPerm=0.21,
                connectedPerm=0.50,
                minThreshold=10,
                newSynapseCount=20,
                permanenceInc=0.10,
                permanenceDec=0.10,
                globalDecay=0.0,
                maxAge=0,
                activationThreshold=13,
                maxSegmentsPerCell=4,
                maxSynapsesPerSegment=255,
                maxInfBacktrack=0,
                maxLrnBacktrack=0,
                maxSeqLength=0,
                seed=42)

        experiment1_column_states.patchTP(tp)
        experiment1_segment_lifetimes.patchTP(tp)

    i = 0
    for encoding in HOTGYM_ENCODED:
        i += 1
        if i % 100 == 0:
            print "timestep %d" % i
        tp.compute(encoding, enableLearn=True, computeInfOutput=True)
        
    return tp
In [13]:
e1_tp = experiment1()
timestep 100
timestep 200
timestep 300
timestep 400
timestep 500
timestep 600
timestep 700
timestep 800
timestep 900
timestep 1000
In [14]:
drawEverything(experiment1_column_states, experiment1_segment_lifetimes)
Out[14]:
In [15]:
for i in xrange(1000):
    if i % 100 == 0:
        print "timestep %d" % i

    activeColumns = random.sample(xrange(1024), 73)
    encoding = np.zeros(1024, dtype='uint32')
    encoding[activeColumns] = 1
    e1_tp.compute(encoding, enableLearn=True, computeInfOutput=True)
timestep 0
timestep 100
timestep 200
timestep 300
timestep 400
timestep 500
timestep 600
timestep 700
timestep 800
timestep 900
In [16]:
drawEverything(experiment1_column_states, experiment1_segment_lifetimes)
Out[16]:
In [17]:
experiment1(e1_tp)
timestep 100
timestep 200
timestep 300
timestep 400
timestep 500
timestep 600
timestep 700
timestep 800
timestep 900
timestep 1000
Out[17]:
<nupic.research.TP.TP at 0x110d1a190>
In [18]:
drawEverything(experiment1_column_states, experiment1_segment_lifetimes)
Out[18]:
In [19]:
with open('2016.05.03.experiment1_column_states.csv', 'w') as f:
    f.write(experiment1_column_states.output.getvalue())
In [20]:
with open('2016.05.03.experiment1_segment_lifetimes.json', 'w') as f:
    f.write(experiment1_segment_lifetimes.getOutput())
In [6]:
tp = nupic.research.TP.TP(
        maxInfBacktrack=0,
        maxLrnBacktrack=0,
        maxSeqLength=0,
        numberOfCols=1024,
        cellsPerColumn=4,
        initialPerm=0.21,
        connectedPerm=0.50,
        minThreshold=10,
        newSynapseCount=20,
        permanenceInc=0.10,
        permanenceDec=0.10,
        globalDecay=0.0,
        maxAge=0,
        activationThreshold=13,
        maxSegmentsPerCell=4,
        maxSynapsesPerSegment=255,
        seed=42)