TFIDFSearch.java

 avatar
unknown
java
a year ago
2.1 kB
3
Indexable
import java.io.*;
import java.util.*;

public class TFIDFSearch {
    private Map<String, Map<Integer, Double>> index;

    @SuppressWarnings("unchecked")
    public void loadIndex(String indexPath) throws IOException, ClassNotFoundException {
        ObjectInputStream in = new ObjectInputStream(new FileInputStream(indexPath));
        index = (Map<String, Map<Integer, Double>>) in.readObject();
        in.close();
    }

    public List<Integer> search(String query, int topN) {
        String[] words = query.toLowerCase().replaceAll("[^a-zA-Z]", " ").split("\\s+");
        Map<Integer, Double> docScores = new HashMap<>();

        for (String word : words) {
            Map<Integer, Double> postings = index.get(word);
            if (postings != null) {
                for (Map.Entry<Integer, Double> entry : postings.entrySet()) {
                    docScores.merge(entry.getKey(), entry.getValue(), Double::sum);
                }
            }
        }

        return docScores.entrySet().stream()
            .sorted(Map.Entry.<Integer, Double>comparingByValue().reversed())
            .limit(topN)
            .map(Map.Entry::getKey)
            .toList();
    }

    public static void main(String[] args) {
        if (args.length < 3) {
            System.err.println("Usage: java TFIDFSearch <index-path> <topN> <queries...>");
            return;
        }

        String indexPath = args[0];
        int topN = Integer.parseInt(args[1]);
        String[] queries = Arrays.copyOfRange(args, 2, args.length);

        TFIDFSearch searcher = new TFIDFSearch();
        try {
            searcher.loadIndex(indexPath);
            for (String query : queries) {
                List<Integer> results = searcher.search(query, topN);
                System.out.println("Query: " + query);
                System.out.println("Top " + topN + " results: " + results);
            }
        } catch (IOException | ClassNotFoundException e) {
            e.printStackTrace();
        }
    }
}
Editor is loading...
Leave a Comment