from collections import Counter
import pydoop.mapreduce.api as api
import pydoop.mapreduce.pipes as pp
from pydoop.avrolib import AvroContext
class Mapper(api.Mapper):
def map(self, ctx):
user = ctx.value
color = user['favorite_color']
if color is not None:
ctx.emit(user['office'], Counter({color: 1}))
class Reducer(api.Reducer):
def reduce(self, ctx):
s = sum(ctx.values, Counter())
ctx.emit('', {'office': ctx.key, 'counts': s})
def __main__():
factory = pp.Factory(mapper_class=Mapper, reducer_class=Reducer)
pp.run_task(factory, private_encoding=True, context_class=AvroContext)
1条答案
按热度按时间s2j5cfk01#
你可以用pydoop>=1.0.0-rc2来实现。下面是颜色计数示例的样子:
要运行应用程序:
有关详细信息,请参阅pydoopavro文档。