AcceleratedC++/Chapter6	AcceleratedC++/Chapter8

Chapter 7 Using associative containers

기존에 이용한 vector, list는 모두 push_back, insert를 이용해서 요소를 넣어주었을 때, 요소가 순서대로 들어가는 순차 컨테이너이다. 이러한 순차컨테이너가 모든 프로그램의 자료구조의 대안이 되어 줄 수는 없다. (예를 들자면 WikiPedia:Binary_search_tree, WikiPedia:AVL_tree 와 같이 최적화된 알고리즘을 통해서 우리는 단순한 순차검색보다 더욱 빠른 수행 시간을 갖는 프로그램의 작성을 하고 싶을 수 있다.)

7.1 Containers that support efficient look-up

연관컨테이너(Associative Container)	요소들을 삽입한 순서대로 배열하지 않고, 요소의 값에 따라 삽입 순서를 자동적으로 조정한다. 따라서 검색알고리즘의 수행시 기존의 순차컨테이너 이상의 성능을 보장한다.
Key	요소의 검색을 위해서 사용되는 검색어. 한개의 요소를 다른 요소와 구분하는 역할을 한다. 키는 요소의 변경시에 변경되지 않는 값이다. 이에 반하여 vector의 인덱스는 요소의 제거와 추가시 인덱스가 변화한다. 참조)DB의 WikiPedia:Primary_key 를 알아보자.
<map>	C++에서 제공되는 연관 배열(Associative Array). 인덱스는 순서를 비교할 수 있는 것이면 무엇이든 가능하다. 단점으로는 자체적 순서를 갖기 때문에 순서를 변경하는 일반 알고리즘을 적용할 수 없다.

7.2 Counting words

//word_cout.cpp
#include <iostream>
#include <map>
#include <string>

using std::cin;
using std::cout;
using std::endl;
using std::map;
using std::string;

int main()
{
	string s;
	map<string, int> counters; // store each word and an associated counter

	// read the input, keeping track of each word and how often we see it
	while (cin >> s)
		++counters[s];

	// write the words and associated counts
	for (std::map<string, int>::const_iterator it = counters.begin();
	     it != counters.end(); ++it) {
		cout << it->first << "\t" << it->second << endl;
	}
	return 0;
}

상기에서 지정된 map<string, int>는 "string에서 int로의 map"라는 용어로 부름. string type key, int type value
최초로 등장한 string key에 대해서 map<k, v>은 새로운 요소를 생성. value-initialized.
map은 []연산자를 통해 키값을 통해서 접근이 가능하나, 이 경우 string type key이기 때문에 모든 배열의 요소를 돌기위해서 일반적인 방식을 선택하였다. map의 각각의 요소는 pair라는 자료의 타입으로 구성. map은 pair의 first 요소에는 key, second 요소에는 value를 담는다.

map<class T>::iterator	K	V
pair	const K	V

따라서 한번 성성된 Key는 변경이 불가하다.

Visual C++ 6.0 에서 소스를 컴파일 할때 책에 나온대로 (using namespace std를 사용하지 않고 위와 같이 사용하는 것들의 이름공간만 지정할 경우) map<string, int>::const_iterator 이렇게 치면 using std::map; 이렇게 미리 이름공간을 선언 했음에도 불구하고 에러가 뜬다. 6.0에서 제대로 인식을 못하는것 같다. 위와 같이 std::map<string, int>::const_iterator 이런식으로 이름 공간을 명시하거나 using namespace std; 라고 선언 하던지 해야 한다.
Visual C++에서 map을 사용할때 warning이 많이 뜬다면 [1] 이 페이지를 참고.

7.3 Generating a cross-reference table

//cross_reference_table.cpp
#include <map>
#include <iostream>
#include <string>
#include <vector>

#include "split.h"  // 6.1.1. 절에서 만들어 놓은 함수

using std::cin;            using std::cout;
using std::endl;           using std::getline;
using std::istream;        using std::string;
using std::vector;         using std::map;

// find all the lines that refer to each word in the input 
// 기본적으로 split 함수를 이용하여서 단어를 tokenize 한다. 만약 인자로 find_url을 주게되면 url이 나타난 위치를 기록한다.
map<string, vector<int> > xref(istream& in, vector<string> find_words(const string&) = split)
{
	string line;
	int line_number = 0;
	map<string, vector<int> > ret;

	// read the next line
	while (getline(in, line)) {
		++line_number;

		// break the input line into words
		vector<string> words = find_words(line);

		// remember that each word occurs on the current line
		for (vector<string>::const_iterator it = words.begin();
		     it != words.end(); ++it)
			ret[*it].push_back(line_number);   // ret[*it] == (it->second) = vector<int> 같은 표현이다.
                                                           // 따라서 백터에는 각 요소가 나타난 곳의 라인이 순서대로 저장된다.
	}
	return ret;
}

int main()
{
	// call `xref' using `split' by default
	map<string, vector<int> > ret = xref(cin);

	// write the results
	for (map<string, vector<int> >::const_iterator it = ret.begin();
	     it != ret.end(); ++it) {
		// write the word
		cout << it->first << " occurs on line(s): ";  // key 값인 string을 출력한다.

		// followed by one or more line numbers
		vector<int>::const_iterator line_it = it->second.begin();  // it->second == vector<int> 동일 표현
		cout << *line_it;	// write the first line number

		++line_it;
		// write the rest of the line numbers, if any
                // second 값인 vector<int> 를 이용하여서 string이 나타난 각 줄의 번호를 출력한다.
		while (line_it != it->second.end()) {
			cout << ", " << *line_it;
			++line_it;
		}
		// write a new line to separate each word from the next
		cout << endl;
	}

	return 0;
}

map<string, vector<int> > xref(istream& in, vector<string> find_words(const string&) = split)

 주의) STL을 이용하면서 많이 범하는 실수: > > (0) >>(X) 컴파일러는 >>에 대해서 operator>>()를 기대한다.
 기본인자(default argument)설정. 함수의 선언식에 매개변수로 = 를 할당하면 기본 인자로 등록된다.

int main()

 기본 변수 split 을 이용해서 입력받은 값을 이용해서 ret를 초기화한다.

map<string, vector<int> >::const_iterator	K	V
pair	first = string	second = vector<int>

7.4 Generating sentences

문법과 주어진 단어를 이용하여서 간단한 문장조합 프로그램을 만들어 본다. 제시된 규칙은 다음과 같다.

	cat
	dog
	table


	large
	brown
	absurd
	sits
	jumps
	on the stairs
	under the sky
	wherever it wants
	the <noun-phrase> <verb> <location>

상기의 규칙을 통해서 우리는 간단하게 
~cpp the table[noun-phrase, noun] jumps[verb] wherever it wants[location]

같은 프로그램을 만들어 볼 수 있다.

참고) Google Talks KLDP google talks perl clone

=== 7.4.1 규칙 표현하기 ===
상기에서는 map<string, vector<string> >의 형태로 구현해야한다. 그러나 <adjective>, <location>, <noun>과 같이 동일한 키 값에 대해서 규칙이 여러개가 존재하는 경우를 다루기 위해서 map <string, vector< vector<string> > > 의 타입을 이용한다.
  // typedef 를 이용해서 좀더 읽기에 수월하도록 형을 지정하는 것이 가능하다.
typedef vector<string> Rule;
typedef vector<Rule> Rule_collection;
typedef map<string, Rule_collection> Grammar;

=== 7.4.2 문법 읽어들이기 ===
Grammar read_grammar(istream& in) {
	Grammar ret;
	string line;
	while (getline(in, line)) {
		vector<string> entry = split(line); // split 함수를 이용해서 입력된 문자열을 ' '를 기존으로 tokenize 한다.
		if (!entry.empty())
			ret[entry[0]].push_back(
				Rule(entry.begin() + 1, entry.end()));
			//vector<string>의 첫번째 요소를 entry의 키값으로 이용한다. 2번째 요소부터 마지막 요소까지는 entry의 값으로 저장한다.
	}
	return ret;
}

=== 7.4.3 문장 생성하기 ===
vector<string> gen_sentence(const Grammar& g) {
	vector<string> ret;
	gen_aux(g, "<sentence>", ret);
	return ret;
}
 
g:Grammar map에서 <sentence> 키값으로 실제의 문장에 관한 문법을 읽어들인다.

bool bracketed(const string& s) {
	return s.size() > 1 && s[0] == '<' && s[s.size()-1] == '>';
}

//Recursive function call 재귀 함수의 이용
void gen_aux(const Grammar& g, const string& word, vector<string>& ret) {
	if (!bracketed(word)) {
		ret.push_back(word);		// 실제로 ret:vector<string> 에 값이 저장되고 재귀 함수가 끝이 나는 조건식이 된다.
	} else {
		Grammar::const_iterator it = g.find(word);		// g[word]로 참조를 할경우 map 컨테이너의 특성상 새로운 map이 생성된다.
		if (it == g.end())
			throw logic_error("empty rule");                // 규직이 존재하지 않는 다면 word로 전달된 규칙이 존재하지 않는 다는 말이된다.
                                                                        // 즉 입력이 잘못된 경우가 된다.

		const Rule_collection& c = it->second;                 

		const Rule& r = c[nrand(c.size())];

		// <noun-phrase> 와 같이 연결된 문법이 <noun> 인경우에는 재귀적 함수 호출을 통해서 마지막 단어까지 내려간다.
		// 마지막 단어까지 내려갓을 경우 재귀 함수를 호출하고 bracketed = false 의 조건이 되기 때문에 재귀 함수가 종료된다.
		for(Rule::const_iterator i = r.begin(); i != r.end(); ++i) 
			gen_aux(g, *i, ret);
	}
}
 
map<class T>.find(K) : 주어진 키를 갖는 요소를 찾고, 있다면 그 요소를 가리키는 반복자를 리턴한다. 없다면 map<class T>.end()를 리턴한다.
nrand(c.size()) : 7.4.4 절에서 설명함. pseudo rand 함수를 통해서 [0, c.size()) 의 범위를 갖는 가상적 임의의 수를 리턴한다.
재귀함수의 호출은 반드시 함수의 내부에 재귀호출을 탈출 할 수 잇는 코드가 존재해야한다. 그렇지 않을 경우 stack_overflow가 발생한다.

int main()
{
	// generate the sentence
	vector<string> sentence = gen_sentence(read_grammar(cin));

	// write the first word, if any
	vector<string>::const_iterator it = sentence.begin();
	if (!sentence.empty()) {
		cout << *it;
		++it;
	}

	// write the rest of the words, each preceded by a space
	while (it != sentence.end()) {
		cout << " " << *it;
		++it;
	}

	cout << endl;
	return 0;
}

=== 7.4.4 무작위 요소를 선택하기 ===
int nrand(int n) {
	if (n <= 0 || n > RAND_MAX)
		throw domain_error("Argument to nrand is out of range");

	const int bucket_size = RAND_MAX / n;
	int r;

	do r = rand() / bucket_size;
	while (r >= n)
	return r;
}
 
rand() 는 C Standard Library <cstdlib> 라이브러리에 존재한다. 일반적으로 cstdlib 에 정의된 RAND_MAX보다 작은 값을 임의적으로 리턴한다.
RAND_MAX % n를 이용해서 임의의 수를 구할 경우 Pseudo 임의 값의 한계로 인해서 문제점이 발생한다.

- n 이 작은 경우: rand()함수는 홀수 짝수를 번갈아 출력하는 성질이 있기 때문에 n이 2일경우 0과 1이 반복적으로 출력된다.
- n 이 너무 큰 경우에도 특정한 수가 반복적으로 나타나게 된다.

따라서 우리는 RAND_MAX를 n으로 나누어서 전체 RAND_MAX를 bucket이라는 단위로 나눈뒤에 이 bucket으로 rand()가 발생시키는 난수를 나누어 줌으로써 우리가 원하는 [0, n) 의 임의의 수를 얻을 수 있다.

=== Addition. Entire Source Filie ===
//grammar.cpp
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <map>
#include <stdexcept>
#include <string>
#include <vector>

#include "split.h"
#include <time.h>

using std::istream;           using std::cin;
using std::copy;              using std::cout;
using std::endl;              using std::find;
using std::getline;           using std::logic_error;
using std::map;               using std::string;
using std::vector;            using std::domain_error;
using std::rand;

typedef vector<string> Rule;
typedef vector<Rule> Rule_collection;
typedef map<string, Rule_collection> Grammar;

// read a grammar from a given input stream
Grammar read_grammar(istream& in)
{
	Grammar ret;
	string line;

	// read the input
	while (getline(in, line)) {

		// `split' the input into words
		vector<string> entry = split(line);

		if (!entry.empty())
			// use the category to store the associated rule
			ret[entry[0]].push_back(
				Rule(entry.begin() + 1, entry.end()));
	}
	return ret;
}

void gen_aux(const Grammar&, const string&, vector<string>&);

int nrand(int);

vector<string> gen_sentence(const Grammar& g)
{
	vector<string> ret;
	gen_aux(g, "<sentence>", ret);
	return ret;
}

bool bracketed(const string& s)
{
	return s.size() > 1 && s[0] == '<' && s[s.size() - 1] == '>';
}

void
gen_aux(const Grammar& g, const string& word, vector<string>& ret)
{

	if (!bracketed(word)) {
		ret.push_back(word);
	} else {
		// locate the rule that corresponds to `word'
		Grammar::const_iterator it = g.find(word);
		if (it == g.end())
			throw logic_error("empty rule");

		// fetch the set of possible rules
		const Rule_collection& c = it->second;

		// from which we select one at random
		const Rule& r = c[nrand(c.size())];

		// recursively expand the selected rule
		for (Rule::const_iterator i = r.begin(); i != r.end(); ++i)
			gen_aux(g, *i, ret);
	}
}

int main()
{
	// generate the sentence
	vector<string> sentence = gen_sentence(read_grammar(cin));

	// write the first word, if any
	vector<string>::const_iterator it = sentence.begin();
	if (!sentence.empty()) {
		cout << *it;
		++it;
	}

	// write the rest of the words, each preceded by a space
	while (it != sentence.end()) {
		cout << " " << *it;
		++it;
	}

	cout << endl;
	return 0;
}

// return a random integer in the range `[0,' `n)'
int nrand(int n)
{
	if (n <= 0 || n > RAND_MAX)
		throw domain_error("Argument to nrand is out of range");

	const int bucket_size = RAND_MAX / n;
	int r;

	do r = rand() / bucket_size;
	while (r >= n);

	return r;
}

7.5 A note on performance

Hash Table	각 키값에 대하여 해쉬 값을 제공해야함. 성능이 해쉬 함수에 크게 영향을 받음. 요소들을 원하는 순서대로 얻기 쉽지 않음
Associative Container in STL	키 값이 <. == 연산으로 비교만 된다면 무엇이든 무관, 요소로의 접근 시간이 logn으로 커짐. 항상 정렬 상태를 유지

STL의 Associative Container는 balanced self-adjusting tree(참고 WikiPedia:AVL_tree )구조를 이용하여서 연관 컨테이너를 구현했음.

AcceleratedC++