1
!"#$!%
School&of&
Computer&Science
&'()*(+,-./01/201345'-/60).3'7*3(0)
85+9-:7*-
Josh&Bloch 6;5',(-/<5''0:
2
!"#$!%
=:>()(.3'(?(5
6;5',(-/5):/;(./4(1-/@-../;5:/5/A5AB/C(',D
E-'/)5>-/(. /F,04-)
E0>-40'G/"*/:7-/30)(C;3
H73/B07/*5)/37')/(3/()/7)3(,/I'(:5B
E0>-40'G/J/*0>()C/.00)/30/5)/K)3-')-3/)-5'/B07
H73/)03/7)3(,/513-'/L;5)G.C(?()C
E5?-/5/;5++B/5):/'-,5M()C/L;5)G.C(?()C
3
!"#$!%
N-B/*0)*-+3./1'0>/L;7'.:5B
O-340'G/+'0C'5>>()C/()/@5?5/(./+'-33B/.3'5(C;310'45':
N-B/*,5..-./10'/L6&P/InetAddressQ/SocketQ/ServerSocket
R(.3'(A73-:/.B.3->./+'0?(:-/.*5,5A(,(3B/5):/'-,(5A(,(3B/
H73/3;-B/5,.0/+'0?(:-/*0>+,-M(3B/5):/;-5:5*;-.
4
!"#$!%
=)*(-)3/;(.30'B/01/*0>+73()C
!S"T./U !SJT./U H53*;/&'0*-..()C/U O03/()3-'5*3(?-Q/1(M-:/,(1-3(>-
V53-#!SJT./U !SWT./U L(>-#.;5'()C/U K)3-'5 *3(?-Q/5'A(3'5'B/,(1-3(>-
K)3-'5*3(?(3B/45./5/major 5:?5)*-
V-:/30/&6/'-?0,73(0)Q/4;(*;/,-:/30/K)3-')-3/'-?0,73(0)
H73/A53*;/+'0*-..()C/)-?-'/4-)3/545B
L;-/0'(C()5,/hardware >0:-,/+-'.(.3-:/()30/3;-/!SXT.
60>+7353(0)5,/+5'5:(C>/(./.3(,,/5)/(>+0'35)3/)(*;-
5
!"#$!%
O-*-..(3B/(./3;-/>03;-'/01/()?-)3(0)
K)/<00C,-Y./-5',B/:5B.Q/;7):'-:./01/big-data&batch&computations
R535/.-3./U *'54,-://4-A/:0*7>-)3.Q/4-A/'-Z7-.3/,0C.Q/-3*[
60>+7353(0)./U ()?-'3-:/():(*-.Q/+5C-/'5)GQ/Z7- 'B/+0+7,5'(3BQ/-3*[
=/*'(3(*5,/+5'3/01/3;-/indexing+pipeline
&5'5,,-,(.>/'-Z7('-:/30/1()(.;/()/5/'-5.0)5A,-/5>07)3/01/3(>-/
F5*;/*0>+7353(0)/;5:/(3./04)/+'0C'5>
E5':/+5'3.P
R(.3'(A73-/:535
&5'5,,-,(\-/*0>+7353(0).
H5,5)*-/,05:
L0,-'53-/-''0'./]<00C,-/7.-:/*;-5+/*0>>0:(3B/;5':45'-^
_A.*7'-:/.(>+,(*(3B/01/7):-',B()C/*0>+7353(0)
6
!"#$!%
K)?-)3(0)/
@-11/R-5)/`/25)a5B/<;->5453 '-5,(\-:/+'0A,->./;5:/*0>>0)/.3'7*37'-
L;-B/4'03-/1' 5>-40'G/3;53/*5+37'-:/.3'7*37'- `/5730>53-:/;5':/+5'3.
R535 /+5 '5,,-,(\53(0)Q/15 7,3/30,-'5)*-Q/:535/:(.3'(A73(0)Q/,05:/A5,5)*( )CQ/-3*[
L;-/A5.(*/5A.3'5*3(0)P
![ &- ' 10'>/3;-/.5>-/*0>+7353(0)/0)/5,,/G-B#?5,7-/+5('./()/+5'5,,-,/]Map^
$[ 8-'C-/3;-/'-.7,3./7.()C/5/.+-*(1 (-:/*0>+7353(0)/]Reduce^
60)*-+3./A0''04-:/1'0>/17)*3(0)5,/+'0C'5>>()C
I7)*3(0)5, /)537'-/01/*0>+7353(0)/?5.3,B/.(>+,(1(-:/157,3/30,-'5)*-
=):/85+9-:7*-/45./A0')
7
!"#$!%
85+9-:7*-
= '0A7.3Q/.*5,5A,-/1'5>-40'G/10'/:(.3'(A7 3-:/*0>+7353(0)/0)/
'-+,(*53-:Q/+5'3(3(0)-:/:535
client
client
k-z:
{pete:12,
reif:42}
Worker 1
a-c:
{alice:90,
bob:42,
cohen:9}
Worker 2
d-g:
{deb:16}
h-j:{ }
Worker 3
{a-c:[2],
d-g:[3,4],
h-j:[3],
k-z:[1]}
Master
8
!"#$!%
85+/1'0>/5/17)*3(0)5,/+-'.+-*3(?-
map(f, x[0…n-1])
=++,B/3;-/17)*3(0)/f 30/-5*;/-,->-)3/01/,(.3/x
-[C[Q/()/&B 3;0)P
def square(x): return x*x
map(square, [1, 2, 3, 4]) 407,:/'-37')/[1, 4, 9, 16]
&5'5,,-,/>5+/(>+,->-)353(0)/(./3'(?(5,
b;53/(./3;-/40'Gc//b;53/(./3;-/:-+3;/].+5)^c
9
!"#$!%
9-:7*-/1'0>/5/17)*3(0)5,/+-'.+-*3(?-
reduce(f, x[0…n-1])
9-+-53-:,B/5++,B/A()5'B/17)*3 (0)/f 30/+5('./01/(3->./()/xQ/'-+,5*()C/
3;-/+5('/01/(3->./4(3;/3;-/'-.7,3/7)3(,/0),B/0)-/(3->/'->5().
_)-/.-Z7-)3(5,/&B3;0)/(>+,->-)353(0)P
def reduce(f, x):
if len(x) == 1: return x[0]
return reduce(f, [f(x[0],x[1])] + x[2:])
-[C[Q/()/& B 3;0)P
def add(x,y): return x+y
reduce(add, [1,2,3,4])
407,:/'-37')/!T/5.
reduce(add, [1,2,3,4])
reduce(add, [3,3,4])
reduce(add, [6,4])
reduce(add, [10]) -> 10
10
!"#$!%
9-:7*-/4(3;/5)/5..0*(53(?-/A()5'B/17)*3(0)
K1/3;-/17)*3(0)/f (./5..0*(53(?-Q/3;-/0': -'/f (./5++,(-:/:0-./)03/
511-*3/3;-/'-.7,3
!//d/]]$de^/d/%^/ !/d/]$/d/]ed%^^// ]!d$^/d/]ed%^
&5'5,,-,/'-:7*-/(>+,->-)353(0)/(./5,.0/-5.B
b;53/(./3;-/40'Gc//b;53/(./3;-/:-+3;/].+5)^c
11
!"#$!%
R(.3'(A73-:/85+9-:7*-
R(.3'(A73-:/85+9-:7*- (./.(>(,5'/30/]A73/)03/3;-/.5>-/5.D^P
reduce(f2, map(f1, x))
N-B/(:-5P f:535#*-)3'(*g/5'*;(3-*37'-
2-):/17)*3( 0)/f1 :('-*3,B/30/3;-/:535
FM-*73-/(3/*0)*7''-)3,B
L;-)/>-'C- /'-.7,3./4(3 ;/'-:7*-
=,.0/*0)*7''-)3,B
client
client
k-z:
{pete:12,
reif:42}
Worker 1
a-c:
{alice:90,
bob:42,
cohen:9}
Worker 2
d-g:
{deb:16}
h-j:{ }
Worker 3
{a-c:[2],
d-g:[3,4],
h-j:[3],
k-z:[1]}
Master
12
!"#$!%
85+9-:7*- 4(3;/G-Bh?5,7-/+5('./] <00C,-/.3B,-^
85.3-'
=..(C)/35.G./30/40'G-'.
&()C/40'G-'./30/3-.3/10'/15(,7'-.
85+/40'G-'.
85+/10'/-5*;/G-Bh?5,7- /+5('
F>(3/()3-'>-:(53-/G-Bh?5,7-/+5 ('.
9-:7*- /40'G-'.
20'3/:535/AB/()3-' > -:(53-/G-B/5):/
5CC'-C53-/AB/G-B
9-:7*-/10'/-5*;/G-B
The shuffle:
13
!"#$!%
i I0'/-5*;/40':/0)/3;-/b-AQ/*07)3/3;-/)7>A-'/01/0**7''-)*-.
§ I0'/85+P//key1 (./5/:0*7>-)3/)5>-Q/value (./(3./*0)3-)3.
§ I0'/9-:7* -P//key2 (./5/40':Q/values (./5/,(.3/01/3;-/)7>A-'/01/*07)3./
01/3;53/40':
85+9-:7*- 4(3;/G-Bh?5,7-/+5('./] <00C,-/.3B,-^
f1(String key1, String value):
for each word w in value:
EmitIntermediate(w, 1);
f2(String key2, Iterator values):
int result = 0;
for each v in values:
result += v;
Emit(key2, result);
Map: (key1, v1) à (key2, v2)* Reduce: (key2, v2*) à (key3, v3)*
MapReduce: (key1, v1)* à (key3, v3)*
MapReduce: (docName, docText)* à (word, wordCount)*
14
!"#$!%
85+9-:7*- 5'*;(3-*37'5,/:-35(,.
j.75,,B/()3-C'53-:/4(3;/:(.3'(A73-:/.30'5C-/.B.3->
85+/40'G-'/-M-*73-./17)*3(0)/0)/(3./.;5'-/01/3;-/:535
85+/073+73/7.75,,B/4'(33-)/30/40'G-'Y./,0*5,/:(.G
2;711,-P/'-:7*-/40'G-'/013-)/+7,,./()3-'>-:(53-/:535/1'0>/>5+/40'G-'Y./
,0*5,/:(.G
9-:7*- /073+73/7.75,,B/4'(33-)/A5*G/30/:(.3'(A73-:/.30'5C-/.B.3->
client
client
k-z:
{pete:12,
reif:42}
Worker 1
a-c:
{alice:90,
bob:42,
cohen:9}
Worker 2
d-g:
{deb:16}
h-j:{ }
Worker 3
{a-c:[2],
d-g:[3,4],
h-j:[3],
k-z:[1]}
Master
15
!"#$!%
E5):,()C/.-'?-'/15(,7'-./4(3;/85+9-:7*-
85+/40'G-'/15(,7'-P
9-#>5+/7.()C/'-+,(*5/01/3;-/.30'5C-/.B.3->/:535
9-:7*- /40'G-'/15(,7 '-P
O-4/'-:7* -/40'G-'/*5)/+7,,/()3-'>-:(53-/:535/1'0>/>5+/40'G-'Y./
,0*5,/:(.GQ/'-#'-:7*-
85.3-'/15(,7'-P
_+3(0).P
9-.35 ' 3/.B.3->/7.()C/)-4/>5.3-'
9-+,(*53-/>5.3-'
client
client
k-z:
{pete:12,
reif:42}
Worker 1
a-c:
{alice:90,
bob:42,
cohen:9}
Worker 2
d-g:
{deb:16}
h-j:{ }
Worker 3
{a-c:[2],
d-g:[3,4],
h-j:[3],
k-z:[1]}
Master
16
!"#$!%
L;-/A-573B/01/85+9-:7*-
V04/*0>>7)(*53(0)/*0.3./]7.75,,B^
L;-/.;711,-/]A-34--)/>5+/5):/'-:7*-^/*5)/A-/-M+-).(?-
85+9-:7*- *5)/A-/(3-'53 -:
K)+73/30/85+9-:7*-P //////G-Bh?5,7-/+5('./()/3;-/:(.3'(A73-:/.30'5C-/.B.3->
_73+73/1'0>/85+9-:7*-P//G-Bh?5,7-/+5( './()/3;-/:(.3'(A73-:/.30'5C-/.B.3- >
client
client
k-z:
{pete:12,
reif:42}
Worker 1
a-c:
{alice:90,
bob:42,
cohen:9}
Worker 2
d-g:
{deb:16}
h-j:{ }
Worker 3
{a-c:[2],
d-g:[3,4],
h-j:[3],
k-z:[1]}
Master
17
!"#$!%
i I0'/-5*;/+5('/01/+-0+,-/()/5/.0*(5,/)-340'GQ/*07)3/>7375,/1'(-):.
§ I0'/85+P//key1 (./5/+-'.0)Q/value (./3;-/,(.3/01/3;-('/1'(-):.
§ I0'/9-:7* -P//key2 (./cccQ/values (. /5/,(.3/01/ccc
85+9-:7*- 30/*07)3/>7375,/1'(-):.
f1(String key1, String value): f2(String key2, Iterator values):
MapReduce: (person, friends)* à (pair of people, count of mutual friends)*
18
!"#$!%
i I0'/-5*;/+5('/01/+-0+,-/()/5/.0*(5,/)-340'GQ/*07)3/>7375,/1'(-):.
§ I0'/85+P//key1 (./5/+-'.0)Q/value (./3;-/,(.3/01/3;-('/1'(-):.
§ I0'/9-:7* -P//key2 (./5/+5('/01/+-0+,-Q/values (./5/,(.3/01/!.Q/10'/-5*;/
>7375,/1'(-):/3;53/+5('/;5.
85+9-:7*- 30/*07)3/>7375,/1'(-):.
f1(String key1, String value):
for each pair of friends
in value:
EmitIntermediate(pair, 1);
f2(String key2, Iterator values):
int result = 0;
for each v in values:
result += v;
Emit(key2, result);
MapReduce: (person, friends)* à (pair of people, count of mutual friends)*
19
!"#$!%
i I0'/-5*;/+5C-/0)/b-AQ/*07)3/)7>A-'/01/+5C-./3;53/,()G/30/(3
§ I0'/85+P//key1 (./5/:0*7>-)3/)5>-Q/value (./3;-/*0)3-)3./01/3;53/
:0*7>-)3
§ I0'/9-:7* -P//key2 (./cccQ/values (. /5/,(.3/01/ccc
85+9-:7*- 30/*07)3/()*0>()C/,()G.
f1(String key1, String value): f2(String key2, Iterator values):
MapReduce: (docName, docText)* à (docName, number of incoming links)*
20
!"#$!%
i I0'/-5*;/+5C-/0)/b-AQ/*07)3/)7>A-'/01/+5C-./3;53/,()G/30/(3
§ I0'/85+P//key1 (./5/:0*7>-)3/)5>-Q/value (./3;-/*0)3-)3./01/3;53/
:0*7>-)3
§ I0'/9-:7* -P//key2 (./,()GQ/values (./5/,(.3/01/!.
85+9-:7*- 30/*07)3/()*0>()C/,()G.
f1(String key1, String value):
for each link in value:
EmitIntermediate(link, 1)
f2(String key2, Iterator values):
int result = 0;
for each v in values:
result += v;
Emit(key2, result);
MapReduce: (docName, docText)* à (docName, number of incoming links)*
21
!"#$!%
i I0'/-5*;/+5C-/0)/3;-/b-AQ/,(.3/3;-/+5C-./3;53/,()G/30/(3
§ I0'/85+P//key1 (./5/:0*7>-)3/)5>-Q/value (./3;-/*0)3-)3./01/3;53/
:0*7>-)3
§ I0'/9-:7* -P//key2 (./cccQ/values (. /5/,(.3/01/ccc
85+9-:7*- 30/*'-53-/5)/()?-'3-:/():-M
f1(String key1, String value):
for each link in value:
EmitIntermediate(link, key1)
f2(String key2, Iterator values):
Emit(key2, values)
MapReduce: (docName, docText)* à (docName, list of incoming links)*
22
!"#$!%
i I0'/-5*;/+5('/01/+-0+,-/()/5/.0*(5,/)-340'GQ/,(.3/>7375,/1'(-):.
§ I0'/85+P//key1 (./5/+-'.0)Q/value (./3;-/,(.3/01/3;-('/1'(-):.
§ I0'/9-:7* -P//key2 (./cccQ/values (. /5/,(.3/01/ccc
V(.3/3;-/>7375,/1'(-):.
f1(String key1, String value): f2(String key2, Iterator values):
MapReduce: (person, friends)* à (pair of people, list of mutual friends)*
23
!"#$!%
i I0'/-5*;/+5('/01/+-0+,-/()/5/.0*(5,/)-340'GQ/,(.3/>7375,/1'(-):.
§ I0'/85+P//key1 (./5/+-'.0)Q/value (./3;-/,(.3/01/3;-('/1'(-):.
§ I0'/9-:7* -P//key2 (. /5/+5('/01/+-0+,-Q/values (. /5/,(.3/01/3;-('/>7375,/
1'(-):.
V(.3/3;-/>7375,/1'(-):.
f1(String key1, String value):
for each pair of friends
in value:
EmitIntermediate(pair, key1);
f2(String key2, Iterator values):
Emit(key2, values)
MapReduce: (person, friends)* à (pair of people, list of mutual friends)*
24
!"#$!%
i I0'/-5*;/+-'.0)/()/5/.0*(5,/)-340'GQ/*07)3/3;-('/1'(-): ./5):/
1'(-):./01/1'(-):.
§ I0'/85+P//key1 (./5/+-'.0)Q/value (./3;-/,(.3/01/3;-('/1'(-):.
§ I0'/9-:7* -P//key2 (./cccQ/values (. /5/,(.3/01/ccc
607)3/1'(-):./d/1'(-):./01/1'(-):.
f1(String key1, String value): f2(String key2, Iterator values):
MapReduce: (person, friends)* à (person, count of f + fof)*
25
!"#$!%
i I0'/-5*;/+-'.0)/()/5/.0*(5,/)-340'GQ/*07)3/3;-('/1'(-): ./5):/
1'(-):./01/1'(-):.
§ I0'/85+P//key1 (./5/+-'.0)Q/value (./3;-/,(.3/01/3;-('/1'(-):.
§ I0'/9-:7* -P//key2 (./cccQ/values (. /5/,(.3/01/ccc
607)3/1'(-):./d/1'(-):./01/1'(-):.
f1(String key1, String value):
for each friend1 in value:
EmitIntermediate(friend1, key1)
for each friend2 in value:
EmitIntermediate(friend1,
friend2);
f2(String key2, Iterator values):
distinct_values = {}
for each v in values:
if not v in distinct_values:
distinct_values.insert(v)
Emit(key2, len(distinct_values))
MapReduce: (person, friends)* à (person, count of f + fof)*
26
!"#$!%
i I0'/-5*;/+-'.0)/()/5/.0*(5,/)-340'GQ/*07)3/3;-('/1'(-): ./5):/
1'(-):./01/1'(-):./5):/1'(-):./01/1'(-):./01/1'(-):.
§ I0'/85+P//key1 (./5/+-'.0)Q/value (./3;-/,(.3/01/;-'/1'(-):.
§ I0'/9-:7* -P//key2 (./cccQ/values (. /5/,(.3/01/ccc
I'(-):./d/1'(-):./01/1'(-): ./d/1'(-):./01/1'(-):./01/1'(-):.
f1(String key1, String value): f2(String key2, Iterator values):
MapReduce: (person, friends)* à (person, count of f + fof + fofof)*
27
!"#$!%
&'0A,->P//E04/30/'-5*;/:(.35)*-/e/)0:-.c
20,73(0)P//K3-'53(?-/85+9-:7*-
j.-/85+9-:7*- 30/C-3/:(.35)*-/!/5):/:(.35)*-/$/)0:-.
I--:/'-.7,3./5./( )+73/30/5/.-*0):/85+9-:7*- +'0*-..
=,.0/*0).(:-'P
H'-5:3;#1('.3/.-5'*;
&5C-95)G
28
!"#$!%
R5351,04/+'0*-..()C
E(C;#,-?- ,/,5)C75C-./5):/.B.3->./10'/*0>+,-M/85+9-:7*-#,(G-/
+'0*-..()C
<00C,-/U I,7>-@5?5Q/8(,,4;--,
k5;00/U &(CQ/E(?-
8(*'0.013/U R'B5:Q/O5(5:
29
!"#$!%
&0.3.*'(+3
85+9-:7*-/+5'5:(C>/;5./A-*0>-/+-'?5.(?-/0?-'/3;-/+5.3/:-*5:-
E5:00+/(./A-.3/G)04)/(>+,->-)353(0)
H73/85+9-:7*-/(./)0/,0)C-'/7.-:/()/<00C,-l./():-M()C/+(+-,()-D
b;Bc
30
!"#$!%
27>>5'B
85+9-:7*- /(./5/+04-'17,/:(.3'(A73-:/+'0*-..()C/1'5>-40'G.
85G-./(3/-5.B/30/:0/157,3#30,-'5)3/4-A#.*5,-/:535/+'0*-..()C
E(:-./5,,/3;-/;5':/+5'3./U +'0C'5>>()C/>0:-,/(./.(>+,-
&5'5:(C>/;5./A-*0>-/+-'?5.(?-
k07l,,/C-3/5/*;5)*-/30/(>+,->-)3/(3/()/B07'.-,1/E0>-40'G/J
_'(C()5,/+5+-'/]$TT%^P/;33+.Phh.353(*[C00C,-7.- '*0)3-)3[*0>h>-:(5h'-.-5'*;[C00C,-[*0>h-)hh5'*;(?-h>5+'-:7*-#0.:(T%[+:1
6=68/+5+-'/]$TTX^P/;33+Phh*07'.-.[45.;()C30)[-:7h()10%%"h:0*.h+!TW#:-5)[+:1