From 620cbfac6f29c3380d013b6c074d8dc47123a852 Mon Sep 17 00:00:00 2001 From: dsc Date: Wed, 21 Dec 2011 15:00:03 -0800 Subject: [PATCH] Bug fixes --- bin/find_haiku.py | 134 +++++++++++++++++++++++++++++------------------------ 1 files changed, 74 insertions(+), 60 deletions(-) diff --git a/bin/find_haiku.py b/bin/find_haiku.py index 1495746..7190f05 100755 --- a/bin/find_haiku.py +++ b/bin/find_haiku.py @@ -8,7 +8,8 @@ __copyright__ = 'Copyright (c) 2011 David Schoonover. All rights reserved.' __homepage__ = 'http://crisishaiku.com/' from crisishaiku import __version__, VERSION -import sys, re, argparse, codecs +import sys, re, codecs, time, argparse +from datetime import datetime from hyphen import Hyphenator from path import path @@ -75,15 +76,12 @@ class HaikuFinder(object): self.verbose = verbose self.var_dir = path(var_dir) - if not self.var_dir.exists(): self.var_dir.makedirs() self.out_dir = path(out_dir) + if not self.var_dir.exists(): self.var_dir.makedirs() if not self.out_dir.exists(): self.out_dir.makedirs() - if outfile is None: - outfile = OUTFILE_OVERLAP if self.overlap else OUTFILE_NO_OVERLAP - self.outfile = self.out_dir/outfile - else: - self.outfile = path(outfile) + outfile = OUTFILE_OVERLAP if self.overlap else OUTFILE_NO_OVERLAP + self.outfile = self.out_dir/outfile self.reportfile = reportfile self.cachefile = self.var_dir/SYLLABLE_FILE @@ -100,10 +98,13 @@ class HaikuFinder(object): def loadCache(self): "Load the syllable cache from disk." - if self.cachefile.exists(): - with cachefile.open('rb') as f: - cache = json.load(f) or {} - if cache and not self.__class__.cache: + cache = {} + try: + with self.cachefile.open('rb') as f: + cache = json.loads(f.read()) or {} + except ValueError: pass + if cache: + if not self.__class__.cache: self.__class__.cache = cache else: self.cache = cache @@ -112,19 +113,31 @@ class HaikuFinder(object): def saveCache(self): "Save the syllable cache to disk." with self.cachefile.open('wb') as f: - json.dump(self.cache, f) + f.write(json.dumps(self.cache)) + return self + + def load(self, statefile=None): + "Load search state." + self.loadCache() return self def save(self, statefile=None): - "Save the search state." + "Save search state." self.saveCache() if statefile is None: statefile = self.statefile FIELDS = 'words haikus seen_lines seen_words'.split() state = { k:v for k, v in self.__dict__.iteritems() if k in FIELDS } with codecs.open(statefile, 'w', 'utf-8') as f: - json.dump(state, f) + f.write(json.dumps(state)) return self + def run(self): + "Runs through the haiku-finder lifecycle." + self.process().writeResults().save() + + + + ### Report Processing def numSyllables(self, word): "Calculate number of syllables in `word`." @@ -194,7 +207,7 @@ class HaikuFinder(object): return haiku - def run(self, reportfile=None, start_line=None, limit=None): + def process(self, reportfile=None, start_line=None, limit=None): "Process the report." if reportfile is None: reportfile = self.reportfile if start_line is None: start_line = self.start_line @@ -215,15 +228,32 @@ class HaikuFinder(object): if False and self.seen_lines % 1000 == 0: print '-' * 20 - print '\nFound %s haiku in %s lines (%s words) so far, taking %ss...' % (len(self.haikus), self.seen_lines, self.seen_words, time.time()-start) - self.printHaiku(self.haikus[-1]) + print '\nFound %s haiku in %s lines (%s words) so far, taking %.3fs...' % (len(self.haikus), self.seen_lines, self.seen_words, time.time()-start) + self.writeHaiku(self.haikus[-1]) - print 'Done! Found %s haiku in %s lines (%s words), taking %ss' % (len(self.haikus), self.seen_lines, self.seen_words, time.time()-start) + print 'Done! Found %s haiku in %s lines (%s words), taking %.3fs' % (len(self.haikus), self.seen_lines, self.seen_words, time.time()-start) return self - def saveHaikus(self, outfile=None, dump_chains=False): - "Write all haikus to `outfile`." + + def writeHaiku(self, info, outfile=sys.stdout, header=True, wordlocs=False): + "Print haiku and metadata to `outfile`." + haiku, linenum, start_words, end_words = info + if header: + wordloc = '' + if wordlocs and start_words and end_words: + wordloc = ' (words %s to %s)' % (start_words, end_words) + outfile.write('On line %s%s:\n' % (linenum, wordloc)) + lines = [ ' '.join(stanza) for stanza in haiku ] + # lines = [ ' '.join( '%s[%s]' % (word, self.numSyllables(word)) for word in stanza ) for stanza in haiku ] + for line in lines: + outfile.write(u' {line: ^80}'.format(line=line).rstrip()+'\n') + outfile.write('\n') + return self + + def writeResults(self, outfile=None, dump_chains=False): + "Write result data to `outfile` (derived from `out_dir` by default)." + if outfile is None: outfile = self.outfile print 'Saving %s haiku to %s...' % (len(self.haikus), outfile) @@ -232,7 +262,7 @@ class HaikuFinder(object): chain = [] with codecs.open(outfile, 'w', 'utf-8') as out: - out.write('Found %s haiku...\n\n' % len(self.haikus)) + out.write('[Found %s Haiku | %s]\n\n' % (len(self.haikus), datetime.now())) for info in self.haikus: haiku, linenum, start_words, end_words = info @@ -245,7 +275,7 @@ class HaikuFinder(object): chains.append(chain) last_wc = end_words - self.printHaiku(info, header=header, outfile=out) + self.writeHaiku(info, header=header, outfile=out) if dump_chains: print '\nLongest Chains of Haikus:' @@ -253,25 +283,9 @@ class HaikuFinder(object): for (length, chain) in chains[:10]: print ('- ' * 40) + '\n' for haiku in chain: - self.printHaiku((haiku, 0, 0, 0), header=False) + self.writeHaiku((haiku, 0, 0, 0), header=False) + print '- ' * 40 - print '- ' * 40 - print '\nDone!' - return self - - def printHaiku(self, info, outfile=sys.stdout, header=True, wordlocs=False): - "Print haiku and metadata to `outfile`." - haiku, linenum, start_words, end_words = info - if header: - wordloc = '' - if wordlocs and start_words and end_words: - wordloc = ' (words %s to %s)' % (start_words, end_words) - outfile.write('On line %s%s:\n' % (linenum, wordloc)) - lines = [ ' '.join(stanza) for stanza in haiku ] - # lines = [ ' '.join( '%s[%s]' % (word, self.numSyllables(word)) for word in stanza ) for stanza in haiku ] - for line in lines: - outfile.write(u' {line: ^80}'.format(line=line).rstrip()+'\n') - outfile.write('\n') return self def printHaikus(self): @@ -279,7 +293,7 @@ class HaikuFinder(object): print '-' * 20 print '\nFound %s haiku so far (line %s)...' % (len(self.haikus), self.seen_lines) for info in self.haikus: - self.printHaiku(info) + self.writeHaiku(info) print return self @@ -289,25 +303,25 @@ class HaikuFinder(object): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('--version', action='version', version=__version__) - parser.add_argument("-v", "--verbose", action="store_true", default=verbose, - help="Verbose logging.") - - parser.add_argument("-s", "--start-line", type=int, default=start_line, - help="Line in FCIR to start haiku processing. [default: %(default)s]") - parser.add_argument("-l", "--limit", type=int, default=limit, - help="Stop processing after finding this many haiku.") - parser.add_argument("-o", "--overlap", action="store_true", dest="overlap", default=overlap, - help="Allow haiku text to overlap in FCIR. [Default: %(default)s]") - parser.add_argument("-O", "--no-overlap", action="store_false", dest="overlap", - help="Do not allow haiku text to overlap in FCIR. [Default: %(default)s]") - - parser.add_argument("-d", "--var-dir", type=path, default=var_dir, - help="Working directory for state files. [default: %(default)s]") - parser.add_argument("--report-file", type=path, dest="reportfile", default=reportfile, - help="Path to Financial Crisis Inquiry Report plaintext file. [default: %(default)s]") - - parser.add_argument("out-dir", nargs='?', type=path, default=out_dir, - help="Directory to write result files. [default: %(default)s]") + parser.add_argument('-v', '--verbose', action='store_true', default=verbose, + help='Verbose logging.') + + parser.add_argument('-s', '--start-line', type=int, default=start_line, + help='Line in FCIR to start haiku processing. [default: %(default)s]') + parser.add_argument('-l', '--limit', type=int, default=limit, + help='Stop processing after finding this many haiku.') + parser.add_argument('-o', '--overlap', action='store_true', dest='overlap', default=overlap, + help='Allow haiku text to overlap in FCIR. [Default: %(default)s]') + parser.add_argument('-O', '--no-overlap', action='store_false', dest='overlap', + help='Do not allow haiku text to overlap in FCIR.') + + parser.add_argument('-d', '--var-dir', type=path, default=var_dir, + help='Working directory for state files. [default: %(default)s]') + parser.add_argument('--report-file', type=path, dest='reportfile', default=reportfile, + help='Path to Financial Crisis Inquiry Report plaintext file. [default: %(default)s]') + + parser.add_argument('out_dir', nargs='?', type=path, default=out_dir, + help='Directory to write result files. [default: %(default)s]') @classmethod -- 1.7.0.4