Вы находитесь на странице: 1из 8

import sys

import pprint
import argparse
from subprocess import Popen, PIPE, STDOUT
from os import path
from question import Question
from question import Questions
########################################################################
class Router(object):
"""
The router collects all the input data and prepares it for parsing.
Method map::
start()
load()
setup()
get_input()
mogrify()
parse()
filter()
<show stats>
write()
"""
# Properties
# ------------------------------------------------------------------
version = '0.1'
develenv = 'Python 2.7.1+ (r271:86832, Apr 11 2011, 18:05:24) [GCC 4.5.2] on
linux2'
# Constructor
# ------------------------------------------------------------------
def __init__(self):
self.questions = []
self.qhash = {}
self.options = None
self.parser = None
self.mogrifyers= []
self.filters = []
self.PrettyPrinter = pprint.PrettyPrinter(indent=4, width=72)
# Magic methods
# ------------------------------------------------------------------
def __str__(self):
infile = self.options.inputfile
tokens = self.parser.tokens if self.parser and self.options.stats > 2 el
se []
toklen = 72 if self.options.stats < 4 else 999
f = self.PrettyPrinter.pformat if self.options.stats > 1 else str
questions = ['%s question %d' % (self.questions[i], i+1) for i in range(
0, len(self.questions))] if self.options.stats > 4 else []
return '''<%s.%s, questions=%d>
%s
input: %s, %s, mode %s,%s encoding %s, newlines %s
%s
mogrifyers: %s
filters: %s
parser: %s
tokens: %s
%s
%s
''' % (
__name__,
self.__class__.__name__,
len(self.questions),
# options
f(vars(self.options)),
# input
infile.name,
'closed' if infile.closed else 'open',
infile.mode,
' size %d,' % path.getsize(infile.name) if infile != sys.stdin else
'',
infile.encoding,
repr(infile.newlines),
# qhash & formatters
f(self.qhash),
f(self.mogrifyers),
f(self.filters),
# parser
f(str(self.parser)),
f(['%-80s token %2d' % (tokens[i][0:toklen] + ('...' if len(tokens[i
]) > toklen else ''), i+1) for i in range(0, len(tokens))]),
f({'questions': Questions(self.questions)}),
'\n'.join(questions),
)
# Public methods
# ------------------------------------------------------------------
def setup(self, options):
"""
>>> options = ['--qualify']
>>> r = Router()
>>> r.setup(options)
>>> r.options
Namespace... qualify=True...
"""
# declare command-line argument parser
command_line = argparse.ArgumentParser(
description='Parses and tokenizes text.',
epilog='Refer to the documentation for more detailed information.',
prog=sys.argv[0],
)
# define the command-line arguments
command_line.add_argument('-V', '--version', action='version',
version='%(prog)s Router version ' + self.version +
' developed with ' + self.develenv,
help='print the version information and exit')
command_line.add_argument('-s', '--stats', nargs='?', metavar='SLVL',
type=int, default=0, const=1,
help='stats print level: 1, 2, 3, 4, 5 (5=most)')
command_line.add_argument('-q', '--qualify', action='store_true',
help='Qualify the output with the question parts') #
e.g. "stem = ...."
command_line.add_argument('-i', dest='inputfile', nargs='?', metavar='IN
FL',
type=argparse.FileType('rU'), default=sys.stdin,
help='input filename, def=stdin')
command_line.add_argument('-o', dest='outputfile', metavar='OUFL', nargs
='?',
type=argparse.FileType('w'), default=sys.stdout, con
st='/dev/null',
help='output filename, def=stdout, const=/dev/null')
command_line.add_argument('-m', dest='mogrifyers', type=str, metavar='MG
RFs',
help='mogrifyer classes "M1, M2,... Mn"')
command_line.add_argument('-p', dest='parser', type=str, metavar='PRSR',
help='parser class')
command_line.add_argument('-f', dest='filters', type=str, metavar='FLTRs
',
help='filterer classes "F1, F2,... Fn"')
command_line.add_argument('-w', dest='writer', type=str, metavar='WRTR',
help='writer class')
command_line.add_argument('input', metavar='INPUT', type=str, nargs='?',
help='input string')
# load the commandline options
self.options = command_line.parse_args(options)
# 'foo , bar' ==>> ['foo', 'bar']
self.options.filters = [f.strip() for f in self.options.filters.split
(',') ] if self.options.filters else []
self.options.mogrifyers = [m.strip() for m in self.options.mogrifyers.sp
lit(',')] if self.options.mogrifyers else []
def load(self, options=sys.argv[1:]):
"""
The primary Router method to handle: setup, mogrifying, parsing and
filtering.
>>> r = Router()
>>> r.load(['''This is the stem
... This is an option'''])
>>> len(r.questions)
1
"""
self.setup(options)
self.parse (self.mogrify (self.get_input ()))
self.filter()
if self.options.stats:
print self
def start(self, options=sys.argv[1:]):
"""
Loads input and writes output.
>>> r = Router()
>>> r.start(['''This is the stem
... This is an option'''])
This is the stem
This is an option
"""
self.load(options)
self.write()
def mogrify(self, string):
"""
Load all mogrifiers specified on the command-line and apply them
one at a time to the input string returning the mogrified string.
A mogrifier takes an input string and applies some search and replace
logic usually to massage the string into a different format: removing
all non-printable characters, for example.
@param string string The input string to mogrify
@return string The mogrified string
>>> from mogrifyer import BooleanoptionMogrifyer
>>> r = Router()
>>> r.setup(['-m', 'BooleanoptionMogrifyer'])
>>> print r.mogrify('''This is the stem
... yes no a. This is an option''')
This is the stem
a. This is an option
"""
self.mogrifyers = list(self._get_mogrifyers())
for mogrifyer in self.mogrifyers:
string = mogrifyer.mogrify(string)
return string
def parse(self, string):
"""
The parsing is the heart of the router and here we run the protected
method _get_parser() to determine the best parser to, instantiate
and object instance for us which we run the parse() method on to
retrieve our question list which we load into ourself.
@param string string The mogrified input string
>>> r = Router()
>>> r.setup([])
>>> r.parse('''This is the stem
... This is an option''')
>>> assert len(r.questions) == 1
>>> print r.questions[0].stem
This is the stem
"""
try:
if string:
self.parser = self._get_parser(string)
self.parser.parse(string)
self.questions = self.parser.questions
except AttributeError:
print "Could not parse input.",
print self.parser if self.parser else '',
print sys.exc_info()[1]
def filter(self):
"""
Load all filters specified on the command-line and apply them
one at a time to the parsed question list.
>>> r = Router()
>>> r.setup(['-f', 'IndexFilter'])
>>> r.parse('1. This is the stem')
>>> print r.questions[0].stem
1. This is the stem
>>> r.filter()
>>> print r.questions[0].stem
This is the stem
"""
self.filters = list(self._get_filters())
for filter in self.filters:
self.questions = filter.filter(self.questions)
def write(self):
try:
writer = self._get_writer()
except AttributeError:
sys.stderr.write("Could not declare writer.")
print sys.exc_info()[1]
return
writer.write(self.options.outputfile, self.questions)
#self.options.outputfile.close() # only close if not stdout
def get_input(self, inputfile=None):
"""
Return the input from the file designated on the commad line.
Note that if no input file is designated then the input file
is the default standard input.
@param inputfile File The open input file object
@return string The input
>>> r = Router()
>>> r.setup(['''This is the stem
... This is an option'''])
>>> print r.get_input()
This is the stem
This is an option
"""
if self.options and self.options.input:
# note: any file input is ignored
return self.options.input
inputfile = self.options.inputfile if not inputfile else inputfile
return self._read(inputfile)
# Protected methods
# ------------------------------------------------------------------
def _read(self, inputfile):
"""
A wrapper for inputfile.read() to trap for the PDF conversion
"""
if '.pdf' == inputfile.name[len(inputfile.name)-4:len(inputfile.name)]:
command_line = ['pdftotext', '-raw', inputfile.name, '-']
proc = Popen(command_line, stdout=PIPE, stderr=STDOUT)
out, err = proc.communicate()
return err if err else out
if inputfile == sys.stdin:
print 'Enter input (ctrl-D on a blank line to end)'
return inputfile.read()
def _get_mogrifyers(self):
for mogrifyer in self.options.mogrifyers:
try:
Mogrifyer = self.__forname("mogrifyer", mogrifyer)
except AttributeError:
print 'mogrifyer', sys.exc_info()[1]
else:
yield Mogrifyer()
def _get_parser(self, string=''):
"""
This tries to use some rudimentary intelligence to determine which
parser to choose based on how many questions it parses out giving
extra weight to a uniform distribution of options.
@param string string The input string for the parsers to parse
@return parser.Parser The selected parser instantiation
>>> r = Router()
>>> r.setup(['-p', 'IndexParser'])
>>> r._get_parser()
<parser.IndexParser object at...
"""
if self.options.parser:
return self.__forname("parser", self.options.parser)()
# run all the parsers for the input string and load the results
# into a hash.
for parserclass in ('IndexParser', 'BlockParser', 'ChunkParser', 'QuestP
arser', 'StemsParser'):
Parser = self.__forname("parser", parserclass)
self.qhash[parserclass] = Questions(Parser().parse(string).questions
)
# now look at the parser results to determine which one to use.
# we first look for an ordered IndexParser and then for an ordered
# ChunkParser otherwise we look for a symetrical IndexParser and
# then a symetrical IndexParser and so on.
if False: parser = ''
elif self.qhash['QuestParser'].length > 1 and self.qhash['QuestParser'].
ordered: parser = 'QuestParser'
elif self.qhash['ChunkParser'].length > 1 and self.qhash['ChunkParser'].
ordered: parser = 'ChunkParser'
elif self.qhash['IndexParser'].length > 1 and self.qhash['IndexParser'].
ordered: parser = 'IndexParser'
elif self.qhash['StemsParser'].length > 1 and self.qhash['StemsParser'].
ordered: parser = 'StemsParser'
elif self.qhash['BlockParser'].length > 1 and self.qhash['BlockParser'].
ordered: parser = 'BlockParser'
elif self.qhash['QuestParser'].length > 1 and self.qhash['QuestParser'].
symetrical: parser = 'QuestParser'
elif self.qhash['ChunkParser'].length > 1 and self.qhash['ChunkParser'].
symetrical: parser = 'ChunkParser'
elif self.qhash['IndexParser'].length > 1 and self.qhash['IndexParser'].
symetrical: parser = 'IndexParser'
elif self.qhash['StemsParser'].length > 1 and self.qhash['StemsParser'].
symetrical: parser = 'StemsParser'
elif self.qhash['BlockParser'].length > 1 and self.qhash['BlockParser'].
symetrical: parser = 'BlockParser'
elif self.qhash['QuestParser'].length > 1: parser = 'QuestParser'
elif self.qhash['ChunkParser'].length > 1: parser = 'ChunkParser'
elif self.qhash['IndexParser'].length > 1: parser = 'IndexParser'
elif self.qhash['StemsParser'].length > 1: parser = 'StemsParser'
else:
parser = 'SingleParser'
return self.__forname("parser", parser)()
def _get_filters(self):
for filter in self.options.filters:
try:
Filter = self.__forname("filter", filter)
except AttributeError:
print 'filter', sys.exc_info()[1]
else:
yield Filter()
# -q command line switch is a shortcut for the QualifiedFilter filter
if self.options.qualify:
yield self.__forname("filter", 'QualifiedFilter')()
def _get_writer(self):
writer = self.options.writer if self.options.writer else 'TextWriter'
Writer = self.__forname("writer", writer)
if Writer:
return Writer()
def _exit(self):
sys.exit()
# Private methods
# ------------------------------------------------------------------
def __forname(self, modname, classname):
"""
Returns a class of "classname" from module "modname".
reposted by ben snider
from http://mail.python.org/pipermail/python-list/2003-March/192221.
html
on http://www.bensnider.com/2008/02/27/dynamically-import-and-inst
antiate-python-classes/
"""
try:
module = __import__(modname)
classobj = getattr(module, classname)
return classobj
except AttributeError:
raise

Вам также может понравиться