C++编程题编写一个建立文档单词索引的应用程序

编写一个建立文档单词索引的应用程序。程序要求输出给定文档中所有单词及其出现在文档中的次数,单词按照字母排列。设文档为文本文档(当然假设构成文档中的所有单词都是合法单词),程序的输出结果以保存为文本文档

这是c++的参考答案,供你参考:
#include <iostream>
#include <fstream>
#include <set>
#include <map>
#include <string>
#include <cctype>//isalpha, isupper,tolower
#include <cstdlib>//exit

using namespace std;

static int s_lineNum = 1;//static linenumber

int main(int argc, char *argv[])
{
if (argc < 3)
{
cout << "usage: " << argv[0] <<"infile" << "outfile" << endl;
cout << "generate a word list from an Englishfile,"
<< "each word is followed by the number of the lines where it occured and then output the resultto a file." << endl;
}
else
{
//common words
string commWord[13] = {"a", "an", "and","are", "in", "is", "of",
"or", "that","the", "this", "to", "have"};

set<string> ignore(commWord, commWord + 13);//ignore the commonwords
set<int> lineNum;
string word;//contain the word extracted from infile
map< string, set<int> > wordlist;//associate word withlineNum
pair< map<string, set<int> >::iterator, bool> pr;
map< string, set<int> >::iterator itWord;
set<int>::iterator itLine;

ifstream infile(argv[1]);//open infile
if (infile.bad())
{
cout << "open "<< argv[1] << " error" << endl;
exit(EXIT_FAILURE);
}

ofstream outfile(argv[2]);
if (outfile.bad())
{
cout << "open " << argv[2] << "error" << endl;
exit(EXIT_FAILURE);
}

char temp;
while(infile.good())
{
temp = infile.get();//get a char
while (isalpha(temp))
{
if (isupper(temp))
{
temp = tolower(temp);
}
word.append(1, temp);
temp = infile.get();//get a newchar
}
//not common word and not empty
if (ignore.count(word) == 0 && word.size() != 0)
{
lineNum.insert(s_lineNum);//insert the line number into lineNum if it isa new word
//return a pair, the firstmember is the iterator, the second is a bool type which indicate whether theinsertion is successful or not
pr = wordlist.insert(pair<string, set<int> >(word, lineNum));
//the map already contained anelement whose key had an equivalent value in the ordering
if (pr.second == false)
{
wordlist[word].insert(s_lineNum);//insert the line number into the setcontainer paired with the word
}
}
word.clear();
lineNum.clear();
if (temp == '\n')
{
s_lineNum++;
}
}

//itWord points to pair< string, set<int> >, the firstmember is word, the second is line number
for (itWord = wordlist.begin(); itWord != wordlist.end(); ++itWord)
{
//set output format
outfile.setf(ios_base::left, ios_base::adjustfield);
outfile.fill('-');
outfile.width(34);
outfile << itWord->first;
outfile << " ";

for (itLine = itWord->second.begin(); itLine !=itWord->second.end(); ++itLine)
{
outfile << *itLine<< ' ';
}
outfile << endl;
}

infile.close();
infile.open(argv[2]);
cout << infile.rdbuf();//streambuf, print the output file
infile.close();
outfile.close();
}

//system("pause");
return 0;
}
一个简单但效率低一些的做法是:
先根据空白符号记录所有单词;
再删除重复出现的单词。
温馨提示:答案为网友推荐,仅供参考