Untitled
import scrapy from scrapy.loader.processors import TakeFirst, MapCompose, Join def tirar_espaco_em_branco(valor): return valor.strip() def processar_caractere_especiais(valor): return valor.replace(u"\u201c",'').replace(u"\u201d",'').replace(u"\2014",'-') class CitacaoItem(scrapy.Item): frase = scrapy.Field( input_processor=MapCompose(tirar_espaco_em_branco,processar_caractere_especiais), output_processor=TakeFirst() ) autor = scrapy.Field( output_processor=TakeFirst() ) tags = scrapy.Field( output_processor=Join(',') )
Leave a Comment