텐서보드 코드 분리, 자동 미분 코드 분리,

This commit is contained in:
rickiepark
2018-05-20 15:26:35 +09:00
parent 6935967596
commit e56cdda3e2
3 changed files with 1218 additions and 495 deletions

View File

@@ -1414,51 +1414,23 @@
"metadata": {},
"source": [
"# 그래프 시각화\n",
"## 쥬피터에서"
"## 쥬피터 노트북안에서"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"주피터 노트북에서 그래프를 나타내기 위해 https://tensorboard.appspot.com/ 에 서비스 중인 텐서보드 서버를 사용하겠습니다(즉, 인터넷 연결이 안되면 작동되지 않습니다). 제가 아는 한 이 코드는 Alex Mordvintsev가 [딥드림 튜토리얼](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/deepdream/deepdream.ipynb)에서 처음 사용했습니다. 또는 [tfgraphviz](https://github.com/akimach/tfgraphviz)를 사용할 수도 있습니다."
]
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import clear_output, Image, display, HTML\n",
"\n",
"def strip_consts(graph_def, max_const_size=32):\n",
" \"\"\"graph_def에서 큰 상수 값은 제외시킵니다.\"\"\"\n",
" strip_def = tf.GraphDef()\n",
" for n0 in graph_def.node:\n",
" n = strip_def.node.add() \n",
" n.MergeFrom(n0)\n",
" if n.op == 'Const':\n",
" tensor = n.attr['value'].tensor\n",
" size = len(tensor.tensor_content)\n",
" if size > max_const_size:\n",
" tensor.tensor_content = b\"<stripped %d bytes>\"%size\n",
" return strip_def\n",
"\n",
"def show_graph(graph_def, max_const_size=32):\n",
" \"\"\"텐서플로 그래프 나타내기\"\"\"\n",
" if hasattr(graph_def, 'as_graph_def'):\n",
" graph_def = graph_def.as_graph_def()\n",
" strip_def = strip_consts(graph_def, max_const_size=max_const_size)\n",
" code = \"\"\"\n",
" <script>\n",
" function load() {{\n",
" document.getElementById(\"{id}\").pbtxt = {data};\n",
" }}\n",
" </script>\n",
" <link rel=\"import\" href=\"https://tensorboard.appspot.com/tf-graph-basic.build.html\" onload=load()>\n",
" <div style=\"height:600px\">\n",
" <tf-graph-basic id=\"{id}\"></tf-graph-basic>\n",
" </div>\n",
" \"\"\".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))\n",
"\n",
" iframe = \"\"\"\n",
" <iframe seamless style=\"width:1200px;height:620px;border:0\" srcdoc=\"{}\"></iframe>\n",
" \"\"\".format(code.replace('\"', '&quot;'))\n",
" display(HTML(iframe))"
"from tensorflow_graph_in_jupyter import show_graph"
]
},
{
@@ -2192,469 +2164,14 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## 자작 계산 그래프 구현하기"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"f(x,y) = ((x) * (x)) * (y) + y + 2\n",
"f(3,4) = 42\n"
]
}
],
"source": [
"class Const(object):\n",
" def __init__(self, value):\n",
" self.value = value\n",
" def evaluate(self):\n",
" return self.value\n",
" def __str__(self):\n",
" return str(self.value)\n",
"\n",
"class Var(object):\n",
" def __init__(self, init_value, name):\n",
" self.value = init_value\n",
" self.name = name\n",
" def evaluate(self):\n",
" return self.value\n",
" def __str__(self):\n",
" return self.name\n",
"\n",
"class BinaryOperator(object):\n",
" def __init__(self, a, b):\n",
" self.a = a\n",
" self.b = b\n",
"\n",
"class Add(BinaryOperator):\n",
" def evaluate(self):\n",
" return self.a.evaluate() + self.b.evaluate()\n",
" def __str__(self):\n",
" return \"{} + {}\".format(self.a, self.b)\n",
"\n",
"class Mul(BinaryOperator):\n",
" def evaluate(self):\n",
" return self.a.evaluate() * self.b.evaluate()\n",
" def __str__(self):\n",
" return \"({}) * ({})\".format(self.a, self.b)\n",
"\n",
"x = Var(3, name=\"x\")\n",
"y = Var(4, name=\"y\")\n",
"f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n",
"print(\"f(x,y) =\", f)\n",
"print(\"f(3,4) =\", f.evaluate())"
"## Autodiff"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 그래디언트 계산\n",
"### 수동 미분"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"df/dx(3,4) = 24\n",
"df/dy(3,4) = 10\n"
]
}
],
"source": [
"df_dx = Mul(Const(2), Mul(x, y)) # df/dx = 2xy\n",
"df_dy = Add(Mul(x, x), Const(1)) # df/dy = x² + 1\n",
"print(\"df/dx(3,4) =\", df_dx.evaluate())\n",
"print(\"df/dy(3,4) =\", df_dy.evaluate())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 수치 미분"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"df/dx(3,4) = 24.000400000048216\n",
"df/dy(3,4) = 10.000000000047748\n"
]
}
],
"source": [
"def gradients(func, vars_list, eps=0.0001):\n",
" partial_derivatives = []\n",
" base_func_eval = func.evaluate()\n",
" for var in vars_list:\n",
" original_value = var.value\n",
" var.value = var.value + eps\n",
" tweaked_func_eval = func.evaluate()\n",
" var.value = original_value\n",
" derivative = (tweaked_func_eval - base_func_eval) / eps\n",
" partial_derivatives.append(derivative)\n",
" return partial_derivatives\n",
"\n",
"df_dx, df_dy = gradients(f, [x, y])\n",
"print(\"df/dx(3,4) =\", df_dx)\n",
"print(\"df/dy(3,4) =\", df_dy)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 기호 미분"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"df/dx(3,4) = 24.0\n",
"df/dy(3,4) = 10.0\n"
]
}
],
"source": [
"Const.derive = lambda self, var: Const(0)\n",
"Var.derive = lambda self, var: Const(1) if self is var else Const(0)\n",
"Add.derive = lambda self, var: Add(self.a.derive(var), self.b.derive(var))\n",
"Mul.derive = lambda self, var: Add(Mul(self.a, self.b.derive(var)), Mul(self.a.derive(var), self.b))\n",
"\n",
"x = Var(3.0, name=\"x\")\n",
"y = Var(4.0, name=\"y\")\n",
"f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n",
"\n",
"df_dx = f.derive(x) # 2xy\n",
"df_dy = f.derive(y) # x² + 1\n",
"print(\"df/dx(3,4) =\", df_dx.evaluate())\n",
"print(\"df/dy(3,4) =\", df_dy.evaluate())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 자동 미분 (autodiff) 전진 방식"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
"class DualNumber(object):\n",
" def __init__(self, value=0.0, eps=0.0):\n",
" self.value = value\n",
" self.eps = eps\n",
" def __add__(self, b):\n",
" return DualNumber(self.value + self.to_dual(b).value,\n",
" self.eps + self.to_dual(b).eps)\n",
" def __radd__(self, a):\n",
" return self.to_dual(a).__add__(self)\n",
" def __mul__(self, b):\n",
" return DualNumber(self.value * self.to_dual(b).value,\n",
" self.eps * self.to_dual(b).value + self.value * self.to_dual(b).eps)\n",
" def __rmul__(self, a):\n",
" return self.to_dual(a).__mul__(self)\n",
" def __str__(self):\n",
" if self.eps:\n",
" return \"{:.1f} + {:.1f}ε\".format(self.value, self.eps)\n",
" else:\n",
" return \"{:.1f}\".format(self.value)\n",
" def __repr__(self):\n",
" return str(self)\n",
" @classmethod\n",
" def to_dual(cls, n):\n",
" if hasattr(n, \"value\"):\n",
" return n\n",
" else:\n",
" return cls(n)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"$3 + (3 + 4 \\epsilon) = 6 + 4\\epsilon$"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"6.0 + 4.0ε"
]
},
"execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"3 + DualNumber(3, 4)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"$(3 + 4ε)\\times(5 + 7ε) = 3 \\times 5 + 3 \\times 7ε + 4ε \\times 5 + 4ε \\times 7ε = 15 + 21ε + 20ε + 28ε^2 = 15 + 41ε + 28 \\times 0 = 15 + 41ε$"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"15.0 + 41.0ε"
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"DualNumber(3, 4) * DualNumber(5, 7)"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"42.0"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.value = DualNumber(3.0)\n",
"y.value = DualNumber(4.0)\n",
"\n",
"f.evaluate()"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [],
"source": [
"x.value = DualNumber(3.0, 1.0) # 3 + ε\n",
"y.value = DualNumber(4.0) # 4\n",
"\n",
"df_dx = f.evaluate().eps\n",
"\n",
"x.value = DualNumber(3.0) # 3\n",
"y.value = DualNumber(4.0, 1.0) # 4 + ε\n",
"\n",
"df_dy = f.evaluate().eps"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"24.0"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_dx"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10.0"
]
},
"execution_count": 102,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_dy"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 자동 미분 후진 방식"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"f(x,y) = ((x) * (x)) * (y) + y + 2\n",
"f(3,4) = 42\n",
"df_dx = 24.0\n",
"df_dy = 10.0\n"
]
}
],
"source": [
"class Const(object):\n",
" def __init__(self, value):\n",
" self.value = value\n",
" def evaluate(self):\n",
" return self.value\n",
" def backpropagate(self, gradient):\n",
" pass\n",
" def __str__(self):\n",
" return str(self.value)\n",
"\n",
"class Var(object):\n",
" def __init__(self, init_value, name):\n",
" self.value = init_value\n",
" self.name = name\n",
" self.gradient = 0\n",
" def evaluate(self):\n",
" return self.value\n",
" def backpropagate(self, gradient):\n",
" self.gradient += gradient\n",
" def __str__(self):\n",
" return self.name\n",
"\n",
"class BinaryOperator(object):\n",
" def __init__(self, a, b):\n",
" self.a = a\n",
" self.b = b\n",
"\n",
"class Add(BinaryOperator):\n",
" def evaluate(self):\n",
" self.value = self.a.evaluate() + self.b.evaluate()\n",
" return self.value\n",
" def backpropagate(self, gradient):\n",
" self.a.backpropagate(gradient)\n",
" self.b.backpropagate(gradient)\n",
" def __str__(self):\n",
" return \"{} + {}\".format(self.a, self.b)\n",
"\n",
"class Mul(BinaryOperator):\n",
" def evaluate(self):\n",
" self.value = self.a.evaluate() * self.b.evaluate()\n",
" return self.value\n",
" def backpropagate(self, gradient):\n",
" self.a.backpropagate(gradient * self.b.value)\n",
" self.b.backpropagate(gradient * self.a.value)\n",
" def __str__(self):\n",
" return \"({}) * ({})\".format(self.a, self.b)\n",
"\n",
"x = Var(3, name=\"x\")\n",
"y = Var(4, name=\"y\")\n",
"f = Add(Mul(Mul(x, x), y), Add(y, Const(2))) # f(x,y) = x²y + y + 2\n",
"\n",
"result = f.evaluate()\n",
"f.backpropagate(1.0)\n",
"\n",
"print(\"f(x,y) =\", f)\n",
"print(\"f(3,4) =\", result)\n",
"print(\"df_dx =\", x.gradient)\n",
"print(\"df_dy =\", y.gradient)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 자동 미분 후진 모드 (텐서플로를 사용해서)"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(42.0, [24.0, 10.0])"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reset_graph()\n",
"\n",
"x = tf.Variable(3., name=\"x\")\n",
"y = tf.Variable(4., name=\"y\")\n",
"f = x*x*y + y + 2\n",
"\n",
"gradients = tf.gradients(f, [x, y])\n",
"\n",
"init = tf.global_variables_initializer()\n",
"\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" f_val, gradients_val = sess.run([f, gradients])\n",
"\n",
"f_val, gradients_val"
"노트: 자동 미분 내용은 [extra_autodiff.ipynb](extra_autodiff.ipynb) 노트북으로 옮겨졌습니다."
]
},
{

1156
extra_autodiff.ipynb Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,50 @@
from __future__ import absolute_import, division, print_function, unicode_literals
# This module defines the show_graph() function to visualize a TensorFlow graph within Jupyter.
# As far as I can tell, this code was originally written by Alex Mordvintsev at:
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/deepdream/deepdream.ipynb
# The original code only worked on Chrome (because of the use of <link rel="import"...>, but the version below
# uses Polyfill (copied from this StackOverflow answer: https://stackoverflow.com/a/41463991/38626)
# so that it can work on other browsers as well.
import numpy as np
import tensorflow as tf
from IPython.display import clear_output, Image, display, HTML
def strip_consts(graph_def, max_const_size=32):
"""Strip large constant values from graph_def."""
strip_def = tf.GraphDef()
for n0 in graph_def.node:
n = strip_def.node.add()
n.MergeFrom(n0)
if n.op == 'Const':
tensor = n.attr['value'].tensor
size = len(tensor.tensor_content)
if size > max_const_size:
tensor.tensor_content = b"<stripped %d bytes>"%size
return strip_def
def show_graph(graph_def, max_const_size=32):
"""Visualize TensorFlow graph."""
if hasattr(graph_def, 'as_graph_def'):
graph_def = graph_def.as_graph_def()
strip_def = strip_consts(graph_def, max_const_size=max_const_size)
code = """
<script src="//cdnjs.cloudflare.com/ajax/libs/polymer/0.3.3/platform.js"></script>
<script>
function load() {{
document.getElementById("{id}").pbtxt = {data};
}}
</script>
<link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
<div style="height:600px">
<tf-graph-basic id="{id}"></tf-graph-basic>
</div>
""".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))
iframe = """
<iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
""".format(code.replace('"', '&quot;'))
display(HTML(iframe))