__homepage__ = 'http://crisishaiku.com/'
from crisishaiku import __version__, VERSION
-import sys, re, argparse, codecs
+import sys, re, codecs, time, argparse
+from datetime import datetime
from hyphen import Hyphenator
from path import path
self.verbose = verbose
self.var_dir = path(var_dir)
- if not self.var_dir.exists(): self.var_dir.makedirs()
self.out_dir = path(out_dir)
+ if not self.var_dir.exists(): self.var_dir.makedirs()
if not self.out_dir.exists(): self.out_dir.makedirs()
- if outfile is None:
- outfile = OUTFILE_OVERLAP if self.overlap else OUTFILE_NO_OVERLAP
- self.outfile = self.out_dir/outfile
- else:
- self.outfile = path(outfile)
+ outfile = OUTFILE_OVERLAP if self.overlap else OUTFILE_NO_OVERLAP
+ self.outfile = self.out_dir/outfile
self.reportfile = reportfile
self.cachefile = self.var_dir/SYLLABLE_FILE
def loadCache(self):
"Load the syllable cache from disk."
- if self.cachefile.exists():
- with cachefile.open('rb') as f:
- cache = json.load(f) or {}
- if cache and not self.__class__.cache:
+ cache = {}
+ try:
+ with self.cachefile.open('rb') as f:
+ cache = json.loads(f.read()) or {}
+ except ValueError: pass
+ if cache:
+ if not self.__class__.cache:
self.__class__.cache = cache
else:
self.cache = cache
def saveCache(self):
"Save the syllable cache to disk."
with self.cachefile.open('wb') as f:
- json.dump(self.cache, f)
+ f.write(json.dumps(self.cache))
+ return self
+
+ def load(self, statefile=None):
+ "Load search state."
+ self.loadCache()
return self
def save(self, statefile=None):
- "Save the search state."
+ "Save search state."
self.saveCache()
if statefile is None: statefile = self.statefile
FIELDS = 'words haikus seen_lines seen_words'.split()
state = { k:v for k, v in self.__dict__.iteritems() if k in FIELDS }
with codecs.open(statefile, 'w', 'utf-8') as f:
- json.dump(state, f)
+ f.write(json.dumps(state))
return self
+ def run(self):
+ "Runs through the haiku-finder lifecycle."
+ self.process().writeResults().save()
+
+
+
+ ### Report Processing
def numSyllables(self, word):
"Calculate number of syllables in `word`."
return haiku
- def run(self, reportfile=None, start_line=None, limit=None):
+ def process(self, reportfile=None, start_line=None, limit=None):
"Process the report."
if reportfile is None: reportfile = self.reportfile
if start_line is None: start_line = self.start_line
if False and self.seen_lines % 1000 == 0:
print '-' * 20
- print '\nFound %s haiku in %s lines (%s words) so far, taking %ss...' % (len(self.haikus), self.seen_lines, self.seen_words, time.time()-start)
- self.printHaiku(self.haikus[-1])
+ print '\nFound %s haiku in %s lines (%s words) so far, taking %.3fs...' % (len(self.haikus), self.seen_lines, self.seen_words, time.time()-start)
+ self.writeHaiku(self.haikus[-1])
- print 'Done! Found %s haiku in %s lines (%s words), taking %ss' % (len(self.haikus), self.seen_lines, self.seen_words, time.time()-start)
+ print 'Done! Found %s haiku in %s lines (%s words), taking %.3fs' % (len(self.haikus), self.seen_lines, self.seen_words, time.time()-start)
return self
- def saveHaikus(self, outfile=None, dump_chains=False):
- "Write all haikus to `outfile`."
+
+ def writeHaiku(self, info, outfile=sys.stdout, header=True, wordlocs=False):
+ "Print haiku and metadata to `outfile`."
+ haiku, linenum, start_words, end_words = info
+ if header:
+ wordloc = ''
+ if wordlocs and start_words and end_words:
+ wordloc = ' (words %s to %s)' % (start_words, end_words)
+ outfile.write('On line %s%s:\n' % (linenum, wordloc))
+ lines = [ ' '.join(stanza) for stanza in haiku ]
+ # lines = [ ' '.join( '%s[%s]' % (word, self.numSyllables(word)) for word in stanza ) for stanza in haiku ]
+ for line in lines:
+ outfile.write(u' {line: ^80}'.format(line=line).rstrip()+'\n')
+ outfile.write('\n')
+ return self
+
+ def writeResults(self, outfile=None, dump_chains=False):
+ "Write result data to `outfile` (derived from `out_dir` by default)."
+
if outfile is None: outfile = self.outfile
print 'Saving %s haiku to %s...' % (len(self.haikus), outfile)
chain = []
with codecs.open(outfile, 'w', 'utf-8') as out:
- out.write('Found %s haiku...\n\n' % len(self.haikus))
+ out.write('[Found %s Haiku | %s]\n\n' % (len(self.haikus), datetime.now()))
for info in self.haikus:
haiku, linenum, start_words, end_words = info
chains.append(chain)
last_wc = end_words
- self.printHaiku(info, header=header, outfile=out)
+ self.writeHaiku(info, header=header, outfile=out)
if dump_chains:
print '\nLongest Chains of Haikus:'
for (length, chain) in chains[:10]:
print ('- ' * 40) + '\n'
for haiku in chain:
- self.printHaiku((haiku, 0, 0, 0), header=False)
+ self.writeHaiku((haiku, 0, 0, 0), header=False)
+ print '- ' * 40
- print '- ' * 40
- print '\nDone!'
- return self
-
- def printHaiku(self, info, outfile=sys.stdout, header=True, wordlocs=False):
- "Print haiku and metadata to `outfile`."
- haiku, linenum, start_words, end_words = info
- if header:
- wordloc = ''
- if wordlocs and start_words and end_words:
- wordloc = ' (words %s to %s)' % (start_words, end_words)
- outfile.write('On line %s%s:\n' % (linenum, wordloc))
- lines = [ ' '.join(stanza) for stanza in haiku ]
- # lines = [ ' '.join( '%s[%s]' % (word, self.numSyllables(word)) for word in stanza ) for stanza in haiku ]
- for line in lines:
- outfile.write(u' {line: ^80}'.format(line=line).rstrip()+'\n')
- outfile.write('\n')
return self
def printHaikus(self):
print '-' * 20
print '\nFound %s haiku so far (line %s)...' % (len(self.haikus), self.seen_lines)
for info in self.haikus:
- self.printHaiku(info)
+ self.writeHaiku(info)
print
return self
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--version', action='version', version=__version__)
- parser.add_argument("-v", "--verbose", action="store_true", default=verbose,
- help="Verbose logging.")
-
- parser.add_argument("-s", "--start-line", type=int, default=start_line,
- help="Line in FCIR to start haiku processing. [default: %(default)s]")
- parser.add_argument("-l", "--limit", type=int, default=limit,
- help="Stop processing after finding this many haiku.")
- parser.add_argument("-o", "--overlap", action="store_true", dest="overlap", default=overlap,
- help="Allow haiku text to overlap in FCIR. [Default: %(default)s]")
- parser.add_argument("-O", "--no-overlap", action="store_false", dest="overlap",
- help="Do not allow haiku text to overlap in FCIR. [Default: %(default)s]")
-
- parser.add_argument("-d", "--var-dir", type=path, default=var_dir,
- help="Working directory for state files. [default: %(default)s]")
- parser.add_argument("--report-file", type=path, dest="reportfile", default=reportfile,
- help="Path to Financial Crisis Inquiry Report plaintext file. [default: %(default)s]")
-
- parser.add_argument("out-dir", nargs='?', type=path, default=out_dir,
- help="Directory to write result files. [default: %(default)s]")
+ parser.add_argument('-v', '--verbose', action='store_true', default=verbose,
+ help='Verbose logging.')
+
+ parser.add_argument('-s', '--start-line', type=int, default=start_line,
+ help='Line in FCIR to start haiku processing. [default: %(default)s]')
+ parser.add_argument('-l', '--limit', type=int, default=limit,
+ help='Stop processing after finding this many haiku.')
+ parser.add_argument('-o', '--overlap', action='store_true', dest='overlap', default=overlap,
+ help='Allow haiku text to overlap in FCIR. [Default: %(default)s]')
+ parser.add_argument('-O', '--no-overlap', action='store_false', dest='overlap',
+ help='Do not allow haiku text to overlap in FCIR.')
+
+ parser.add_argument('-d', '--var-dir', type=path, default=var_dir,
+ help='Working directory for state files. [default: %(default)s]')
+ parser.add_argument('--report-file', type=path, dest='reportfile', default=reportfile,
+ help='Path to Financial Crisis Inquiry Report plaintext file. [default: %(default)s]')
+
+ parser.add_argument('out_dir', nargs='?', type=path, default=out_dir,
+ help='Directory to write result files. [default: %(default)s]')
@classmethod