Untitled

 avatar
unknown
plain_text
a month ago
631 B
2
Indexable
import scrapy
from scrapy.loader.processors import TakeFirst, MapCompose, Join


def tirar_espaco_em_branco(valor):
    return valor.strip() 

def processar_caractere_especiais(valor):
     return valor.replace(u"\u201c",'').replace(u"\u201d",'').replace(u"\2014",'-')


class CitacaoItem(scrapy.Item):
     frase = scrapy.Field(
          input_processor=MapCompose(tirar_espaco_em_branco,processar_caractere_especiais),
          output_processor=TakeFirst()
     )
     autor = scrapy.Field(
          output_processor=TakeFirst()
     )
     tags = scrapy.Field(
          output_processor=Join(',')
     )
Leave a Comment