Untitled
unknown
plain_text
5 months ago
13 kB
4
Indexable
#include <algorithm> #include <cmath> #include <iostream> #include <map> #include <set> #include <stdexcept> #include <string> #include <utility> #include <vector> using namespace std; const int MAX_RESULT_DOCUMENT_COUNT = 5; string ReadLine() { string s; getline(cin, s); return s; } int ReadLineWithNumber() { int result; cin >> result; ReadLine(); return result; } vector<string> SplitIntoWords(const string& text) { vector<string> words; string word; for (const char c : text) { if (c == ' ') { if (!word.empty()) { words.push_back(word); word.clear(); } } else { word += c; } } if (!word.empty()) { words.push_back(word); } return words; } struct Document { Document() = default; Document(int id, double relevance, int rating) : id(id) , relevance(relevance) , rating(rating) { } int id = 0; double relevance = 0.0; int rating = 0; }; template <typename StringContainer> set<string> MakeUniqueNonEmptyStrings(const StringContainer& strings) { set<string> non_empty_strings; for (const string& str : strings) { if (!str.empty()) { non_empty_strings.insert(str); } } return non_empty_strings; } enum class DocumentStatus { ACTUAL, IRRELEVANT, BANNED, REMOVED, }; class SearchServer { public: template <typename StringContainer> explicit SearchServer(const StringContainer& stop_words) : stop_words_(MakeUniqueNonEmptyStrings(stop_words)) // Extract non-empty stop words { if (!all_of(stop_words_.begin(), stop_words_.end(), IsValidWord)) { throw invalid_argument("Some of stop words are invalid"s); } } explicit SearchServer(const string& stop_words_text) : SearchServer( SplitIntoWords(stop_words_text)) // Invoke delegating constructor from string container { } void AddDocument(int document_id, const string& document, DocumentStatus status, const vector<int>& ratings) { if ((document_id < 0) || (documents_.count(document_id) > 0)) { throw invalid_argument("Invalid document_id"s); } const auto words = SplitIntoWordsNoStop(document); const double inv_word_count = 1.0 / words.size(); for (const string& word : words) { word_to_document_freqs_[word][document_id] += inv_word_count; } documents_.emplace(document_id, DocumentData{ComputeAverageRating(ratings), status}); document_ids_.push_back(document_id); } template <typename DocumentPredicate> vector<Document> FindTopDocuments(const string& raw_query, DocumentPredicate document_predicate) const { const auto query = ParseQuery(raw_query); auto matched_documents = FindAllDocuments(query, document_predicate); sort(matched_documents.begin(), matched_documents.end(), [](const Document& lhs, const Document& rhs) { if (abs(lhs.relevance - rhs.relevance) < 1e-6) { return lhs.rating > rhs.rating; } else { return lhs.relevance > rhs.relevance; } }); if (matched_documents.size() > MAX_RESULT_DOCUMENT_COUNT) { matched_documents.resize(MAX_RESULT_DOCUMENT_COUNT); } return matched_documents; } vector<Document> FindTopDocuments(const string& raw_query, DocumentStatus status) const { return FindTopDocuments( raw_query, [status](int document_id, DocumentStatus document_status, int rating) { return document_status == status; }); } vector<Document> FindTopDocuments(const string& raw_query) const { return FindTopDocuments(raw_query, DocumentStatus::ACTUAL); } int GetDocumentCount() const { return documents_.size(); } int GetDocumentId(int index) const { return document_ids_.at(index); } tuple<vector<string>, DocumentStatus> MatchDocument(const string& raw_query, int document_id) const { const auto query = ParseQuery(raw_query); vector<string> matched_words; for (const string& word : query.plus_words) { if (word_to_document_freqs_.count(word) == 0) { continue; } if (word_to_document_freqs_.at(word).count(document_id)) { matched_words.push_back(word); } } for (const string& word : query.minus_words) { if (word_to_document_freqs_.count(word) == 0) { continue; } if (word_to_document_freqs_.at(word).count(document_id)) { matched_words.clear(); break; } } return {matched_words, documents_.at(document_id).status}; } private: struct DocumentData { int rating; DocumentStatus status; }; const set<string> stop_words_; map<string, map<int, double>> word_to_document_freqs_; map<int, DocumentData> documents_; vector<int> document_ids_; bool IsStopWord(const string& word) const { return stop_words_.count(word) > 0; } static bool IsValidWord(const string& word) { // A valid word must not contain special characters return none_of(word.begin(), word.end(), [](char c) { return c >= '\0' && c < ' '; }); } vector<string> SplitIntoWordsNoStop(const string& text) const { vector<string> words; for (const string& word : SplitIntoWords(text)) { if (!IsValidWord(word)) { throw invalid_argument("Word "s + word + " is invalid"s); } if (!IsStopWord(word)) { words.push_back(word); } } return words; } static int ComputeAverageRating(const vector<int>& ratings) { if (ratings.empty()) { return 0; } int rating_sum = 0; for (const int rating : ratings) { rating_sum += rating; } return rating_sum / static_cast<int>(ratings.size()); } struct QueryWord { string data; bool is_minus; bool is_stop; }; QueryWord ParseQueryWord(const string& text) const { if (text.empty()) { throw invalid_argument("Query word is empty"s); } string word = text; bool is_minus = false; if (word[0] == '-') { is_minus = true; word = word.substr(1); } if (word.empty() || word[0] == '-' || !IsValidWord(word)) { throw invalid_argument("Query word "s + text + " is invalid"); } return {word, is_minus, IsStopWord(word)}; } struct Query { set<string> plus_words; set<string> minus_words; }; Query ParseQuery(const string& text) const { Query result; for (const string& word : SplitIntoWords(text)) { const auto query_word = ParseQueryWord(word); if (!query_word.is_stop) { if (query_word.is_minus) { result.minus_words.insert(query_word.data); } else { result.plus_words.insert(query_word.data); } } } return result; } // Existence required double ComputeWordInverseDocumentFreq(const string& word) const { return log(GetDocumentCount() * 1.0 / word_to_document_freqs_.at(word).size()); } template <typename DocumentPredicate> vector<Document> FindAllDocuments(const Query& query, DocumentPredicate document_predicate) const { map<int, double> document_to_relevance; for (const string& word : query.plus_words) { if (word_to_document_freqs_.count(word) == 0) { continue; } const double inverse_document_freq = ComputeWordInverseDocumentFreq(word); for (const auto &[document_id, term_freq] : word_to_document_freqs_.at(word)) { const auto& document_data = documents_.at(document_id); if (document_predicate(document_id, document_data.status, document_data.rating)) { document_to_relevance[document_id] += term_freq * inverse_document_freq; } } } for (const string& word : query.minus_words) { if (word_to_document_freqs_.count(word) == 0) { continue; } for (const auto &[document_id, _] : word_to_document_freqs_.at(word)) { document_to_relevance.erase(document_id); } } vector<Document> matched_documents; for (const auto &[document_id, relevance] : document_to_relevance) { matched_documents.push_back( {document_id, relevance, documents_.at(document_id).rating}); } return matched_documents; } }; template <typename it> class IteratorRange { public: IteratorRange (it Begin,it End){ begin_=Begin; end_=End; size_=distance(Begin,End); } auto GetBegin(){ return begin; } auto GetEnd(){ return end_; } int GetSize(){ return size_; } private: it begin_; it end_; int size_; }; template <typename Container> auto Paginate(const Container& c, int page_size) { return Paginator(begin(c), end(c), page_size); } template <typename Iterator> class Paginator { public: Paginator(Iterator begin,Iterator end,int page_size){ for (int i=distance(begin,end)/page_size;i!=0;--i){//заполняем все полные страницы Iterator previus_begin=begin;//сохраняем начало предыдущей страницы advance(begin,page_size);//двигаем на начало следующей страницы IteratorRange page(previus_begin,begin); pages_.push_back(page);//вставляем } if (distance(begin,end)>0){ IteratorRange last_page(begin,end); pages_.push_back(last_page);//заполняем последнюю страницу } } auto GetPages(){ return pages_; } private: vector<IteratorRange<Iterator>> pages_; }; template <typename It> void PrintRange(It range_begin, It range_end) { for (auto it = range_begin; it != range_end; ++it) { cout << *it << " "s; } cout << endl; } ostream& operator<<(ostream& output,Document document) { output<<"{ document_id = "s <<document.id <<",relevance = "s <<document.relevance <<", rating = "s <<document.rating <<" }"s; return output; } template <typename it> ostream& operator<<(ostream& output,IteratorRange<it> page) { output<<PrintRange(page.GetBegin(),page.GetEnd()); return output; } int main() { SearchServer search_server("and with"s); search_server.AddDocument(1, "funny pet and nasty rat"s, DocumentStatus::ACTUAL, {7, 2, 7}); search_server.AddDocument(2, "funny pet with curly hair"s, DocumentStatus::ACTUAL, {1, 2, 3}); search_server.AddDocument(3, "big cat nasty hair"s, DocumentStatus::ACTUAL, {1, 2, 8}); search_server.AddDocument(4, "big dog cat Vladislav"s, DocumentStatus::ACTUAL, {1, 3, 2}); search_server.AddDocument(5, "big dog hamster Borya"s, DocumentStatus::ACTUAL, {1, 1, 1}); const auto search_results = search_server.FindTopDocuments("curly dog"s); int page_size = 2; const auto pages = Paginate(search_results, page_size); // Выводим найденные документы по страницам for (auto page = pages.begin(); page != pages.end(); ++page) { cout << *page << endl; cout << "Page break"s << endl; } }
Editor is loading...
Leave a Comment