<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://mediawiki.zeropage.org/api.php?action=feedcontributions&amp;feedformat=atom&amp;user=114.108.98.54</id>
	<title>ZeroWiki - User contributions [en]</title>
	<link rel="self" type="application/atom+xml" href="https://mediawiki.zeropage.org/api.php?action=feedcontributions&amp;feedformat=atom&amp;user=114.108.98.54"/>
	<link rel="alternate" type="text/html" href="https://mediawiki.zeropage.org/index.php/Special:Contributions/114.108.98.54"/>
	<updated>2026-05-15T09:59:31Z</updated>
	<subtitle>User contributions</subtitle>
	<generator>MediaWiki 1.39.8</generator>
	<entry>
		<id>https://mediawiki.zeropage.org/index.php?title=%EB%A8%B8%EC%8B%A0%EB%9F%AC%EB%8B%9D%EC%8A%A4%ED%84%B0%EB%94%94/2016/2016_07_09&amp;diff=50314</id>
		<title>머신러닝스터디/2016/2016 07 09</title>
		<link rel="alternate" type="text/html" href="https://mediawiki.zeropage.org/index.php?title=%EB%A8%B8%EC%8B%A0%EB%9F%AC%EB%8B%9D%EC%8A%A4%ED%84%B0%EB%94%94/2016/2016_07_09&amp;diff=50314"/>
		<updated>2016-07-13T05:23:46Z</updated>

		<summary type="html">&lt;p&gt;114.108.98.54: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&amp;amp;#91;&amp;amp;#91;pagelist(^(머신러닝스터디/2016))&amp;amp;#93;&amp;amp;#93;&lt;br /&gt;
== 내용 ==&lt;br /&gt;
* Embedding에는 word index가 필요함.&lt;br /&gt;
** 초기에 Tokenizer로 word frequency를 input으로 썼는데 학습이 잘 안됨.&lt;br /&gt;
** [http://keras.io/layers/embeddings/]&lt;br /&gt;
 tokenizer = Tokenizer(nb_words=1000)&lt;br /&gt;
 X_train = tokenizer.sequences_to_matrix(X_train, mode=&amp;quot;freq&amp;quot;)&lt;br /&gt;
&lt;br /&gt;
* optimizer&lt;br /&gt;
** adamax 를 썼는데 accuracy가 50% 대에 머무름&lt;br /&gt;
** tensorflow는 adamax를 제공하지 않음. keras 자체 구현됨([https://github.com/fchollet/keras/blob/c30432a665e23d598d2cec11094b0cc3cac2f46d/keras/optimizers.py#L356 code]).&lt;br /&gt;
&lt;br /&gt;
* 적절한 batch size&lt;br /&gt;
** batch size가 너무 작으면(e.g. 32) 학습이 오래 걸린다.&lt;br /&gt;
** 반면 너무 크면 메모리를 많이 사용하게 된다.&lt;br /&gt;
=== 코드 ===&lt;br /&gt;
 import keras&lt;br /&gt;
 import numpy as np&lt;br /&gt;
 from keras.datasets import imdb&lt;br /&gt;
 from keras.preprocessing.text import Tokenizer&lt;br /&gt;
 from keras.models import Sequential&lt;br /&gt;
 from keras.layers import Dense, Dropout, Embedding, LSTM&lt;br /&gt;
 &lt;br /&gt;
 (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=1000)&lt;br /&gt;
 &lt;br /&gt;
 from keras.preprocessing.sequence import pad_sequences&lt;br /&gt;
 X_train = pad_sequences(X_train, 1000)&lt;br /&gt;
 X_test = pad_sequences(X_test, 1000)&lt;br /&gt;
 &lt;br /&gt;
 model = Sequential()&lt;br /&gt;
 model.add(Embedding(1000, 64, input_length=1000))&lt;br /&gt;
 model.add(LSTM(output_dim=32, activation=&#039;sigmoid&#039;, inner_activation=&#039;hard_sigmoid&#039;))&lt;br /&gt;
 model.add(Dense(16, activation=&amp;quot;relu&amp;quot;))&lt;br /&gt;
 model.add(Dropout(0.5))&lt;br /&gt;
 model.add(Dense(8, activation=&amp;quot;relu&amp;quot;))&lt;br /&gt;
 model.add(Dropout(0.5))&lt;br /&gt;
 model.add(Dense(1, activation=&amp;quot;sigmoid&amp;quot;))&lt;br /&gt;
 &lt;br /&gt;
 model.compile(loss=&amp;quot;binary_crossentropy&amp;quot;, optimizer=&amp;quot;adagrad&amp;quot;, metrics=[&amp;quot;accuracy&amp;quot;])&lt;br /&gt;
 &lt;br /&gt;
 model.fit(X_train, y_train, batch_size=500, nb_epoch=100)&lt;br /&gt;
 model.evaluate(X_test, y_test, batch_size=1000)&lt;br /&gt;
 pred = model.predict(X_test, batch_size=20000)&lt;br /&gt;
 &lt;br /&gt;
 print (pred[0], y_test[0])&lt;br /&gt;
 print (pred[1], y_test[1])&lt;br /&gt;
 print (pred[2], y_test[2])&lt;br /&gt;
&lt;br /&gt;
=== Padding ===&lt;br /&gt;
pad_sequences은 배열의 길이가 다를 때 특정값을 채워넣어 길이를 맞춘다.&lt;br /&gt;
 X_train = pad_sequences(X_train, 1000)&lt;br /&gt;
위의 코드는 X_train의 인풋 배열중 1000보다 길이가 짧은 배열에 0을 채워넣는다.&lt;br /&gt;
그러나 1000보다 더 긴 배열을 줄여주진 않는다. &lt;br /&gt;
 (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=1000)&lt;br /&gt;
다음과 같이 input length가 될 단어 인덱스 길이를 1000으로 제한해야 한다.&lt;br /&gt;
&lt;br /&gt;
결과 예시&lt;br /&gt;
 array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,&lt;br /&gt;
          0,   0,   1,  20,  28, 716,  48, 495,  79,  27, 493,   8,   2,&lt;br /&gt;
          7,  50,   5,   2,   2,  10,   5, 852, 157,  11,   5,   2,   2,&lt;br /&gt;
         10,   5, 500,   2,   6,  33, 256,  41,   2,   7,  17,  23,  48,&lt;br /&gt;
          2,   2,  26, 269, 929,  18,   2,   7,   2,   2,   8, 105,   5,&lt;br /&gt;
          2, 182, 314,  38,  98, 103,   7,  36,   2, 246, 360,   7,  19,&lt;br /&gt;
        396,  17,  26, 269, 929,  18,   2, 493,   6, 116,   7, 105,   5,&lt;br /&gt;
        575, 182,  27,   5,   2,   2, 130,  62,  17,  24,  89,  17,  13,&lt;br /&gt;
        381,   2,   8,   2,   7,   5,   2,  38, 325,   7,  17,  23,  93,&lt;br /&gt;
          9, 156, 252,  19, 235,  20,  28,   5, 104,  76,   7,  17, 169,&lt;br /&gt;
         35,   2,  17,  23,   2,   7,  36,   2, 934,  56,   2,   6,  17,&lt;br /&gt;
        891, 214,  11,   5,   2,   6,  92,   6,  33, 256,  82,   7], dtype=int32)&lt;br /&gt;
&lt;br /&gt;
nb_words로 배열의 최대 길이를 지정하지 않으면 Embedding 단계에서 out of index 에러가 난다.&lt;br /&gt;
 IndexError: index 4414 is out of bounds for size 1000&lt;br /&gt;
&lt;br /&gt;
=== 학습 실패 ===&lt;br /&gt;
 Using Theano backend.&lt;br /&gt;
 Epoch 1/10&lt;br /&gt;
 22500/22500 [==============================] - 115s - loss: 0.6932 - acc: 0.5014     &lt;br /&gt;
 Epoch 2/10&lt;br /&gt;
 22500/22500 [==============================] - 115s - loss: 0.6932 - acc: 0.5010     &lt;br /&gt;
 Epoch 3/10&lt;br /&gt;
 22500/22500 [==============================] - 114s - loss: 0.6932 - acc: 0.5014     &lt;br /&gt;
 Epoch 4/10&lt;br /&gt;
 22500/22500 [==============================] - 115s - loss: 0.6931 - acc: 0.5014     &lt;br /&gt;
 Epoch 5/10&lt;br /&gt;
 22500/22500 [==============================] - 115s - loss: 0.6931 - acc: 0.5014     &lt;br /&gt;
 Epoch 6/10&lt;br /&gt;
 22500/22500 [==============================] - 115s - loss: 0.6932 - acc: 0.5014     &lt;br /&gt;
 Epoch 7/10&lt;br /&gt;
 22500/22500 [==============================] - 114s - loss: 0.6931 - acc: 0.5014     &lt;br /&gt;
 Epoch 8/10&lt;br /&gt;
 22500/22500 [==============================] - 114s - loss: 0.6932 - acc: 0.5016     &lt;br /&gt;
 Epoch 9/10&lt;br /&gt;
 22500/22500 [==============================] - 115s - loss: 0.6932 - acc: 0.5014     &lt;br /&gt;
 Epoch 10/10&lt;br /&gt;
 22500/22500 [==============================] - 115s - loss: 0.6932 - acc: 0.5014   &lt;br /&gt;
&lt;br /&gt;
=== 학습 성공 ===&lt;br /&gt;
 Epoch 1/10&lt;br /&gt;
 22500/22500 [==============================] - 14282s - loss: 0.6927 - acc: 0.5164     &lt;br /&gt;
 Epoch 2/10&lt;br /&gt;
 22500/22500 [==============================] - 10235s - loss: 0.6864 - acc: 0.5618     &lt;br /&gt;
 Epoch 3/10&lt;br /&gt;
 22500/22500 [==============================] - 3236s - loss: 0.6541 - acc: 0.6508     &lt;br /&gt;
 Epoch 4/10&lt;br /&gt;
 22500/22500 [==============================] - 3230s - loss: 0.5829 - acc: 0.7528     &lt;br /&gt;
 Epoch 5/10&lt;br /&gt;
 22500/22500 [==============================] - 3222s - loss: 0.5490 - acc: 0.7745     &lt;br /&gt;
 Epoch 6/10&lt;br /&gt;
 22500/22500 [==============================] - 3229s - loss: 0.5250 - acc: 0.7946     &lt;br /&gt;
 Epoch 7/10&lt;br /&gt;
 22500/22500 [==============================] - 3230s - loss: 0.5052 - acc: 0.8030     &lt;br /&gt;
 Epoch 8/10&lt;br /&gt;
 22300/22500 [============================&amp;amp;gt;.] - ETA: 28s - loss: 0.4963 - acc: 0.8046&lt;br /&gt;
(사실 다음날 보니 프로세스가 죽어있어서 Epoch 8/10 이후의 결과는 없음...)&lt;br /&gt;
== 다음 시간에는 ==&lt;br /&gt;
* Coursera 동영상 week 7 보기&lt;br /&gt;
== 더 보기 ==&lt;br /&gt;
&lt;/div&gt;</summary>
		<author><name>114.108.98.54</name></author>
	</entry>
</feed>