#### Part 4: Python lambda expressions¶

In [1]:
labVersion = 'cs190_week1_v_1_2'


### Part 1: Math review ¶

#### Calculate the value of $\mathbf{y}$: $$\mathbf{y} = 2 \begin{bmatrix} 2 \\\ 4 \\\ 8 \end{bmatrix}$$¶

In [2]:
# TODO: Replace <FILL IN> with appropriate code
# Manually calculate your answer and represent the vector as a list of integers values.
# For example, [2, 4, 8].
x = [3, -6, 0]
y = [4, 8, 16]

In [3]:
# TEST Scalar multiplication: vectors (1a)
# Import test library
from test_helper import Test
Test.assertEqualsHashed(x, 'e460f5b87531a2b60e0f55c31b2e49914f779981',
'incorrect value for vector x')
Test.assertEqualsHashed(y, 'e2d37ff11427dbac7f833a5a7039c0de5a740b1e',
'incorrect value for vector y')

1 test passed.
1 test passed.


#### Calculate the value of $\mathbf{z}$: $$\mathbf{z} = \begin{bmatrix} 1 \\\ 2 \\\ 3 \end{bmatrix} \odot \begin{bmatrix} 4 \\\ 5 \\\ 6 \end{bmatrix}$$¶

In [4]:
# TODO: Replace <FILL IN> with appropriate code
# Manually calculate your answer and represent the vector as a list of integers values.
z = [4, 10, 18]

In [5]:
# TEST Element-wise multiplication: vectors (1b)
Test.assertEqualsHashed(z, '4b5fe28ee2d274d7e0378bf993e28400f66205c2',
'incorrect value for vector z')

1 test passed.


#### $$c_2 = \begin{bmatrix} 3 \\\ 4 \\\ 5 \end{bmatrix} \cdot \begin{bmatrix} 1 \\\ 2 \\\ 3 \end{bmatrix}$$¶

In [6]:
# TODO: Replace <FILL IN> with appropriate code
# Manually calculate your answer and set the variables to their appropriate integer values.
c1 = -11
c2 = 26

In [7]:
# TEST Dot product (1c)
Test.assertEqualsHashed(c1, '8d7a9046b6a6e21d66409ad0849d6ab8aa51007c', 'incorrect value for c1')
Test.assertEqualsHashed(c2, '887309d048beef83ad3eabf2a79a64a389ab1c9f', 'incorrect value for c2')

1 test passed.
1 test passed.


#### $$\mathbf{Y} = \begin{bmatrix} 1 \\\ 2 \\\ 3 \end{bmatrix} \begin{bmatrix} 1 & 2 & 3 \end{bmatrix}$$

In [12]:
# TODO: Replace <FILL IN> with appropriate code
# Represent matrices as lists within lists. For example, [[1,2,3], [4,5,6]] represents a matrix with
# two rows and three columns. Use integer values.
X = [[22, 28], [49, 64]]
Y = [[1, 2, 3], [2, 4, 6], [3, 6, 9]]

In [13]:
# TEST Matrix multiplication (1d)
'incorrect value for matrix X')
Test.assertEqualsHashed(Y, 'f985daf651531b7d776523836f3068d4c12e4519',
'incorrect value for matrix Y')

1 test passed.
1 test passed.


### Part 2: NumPy ¶

#### Note that if you create an array from a Python list of integers you will obtain a one-dimensional array, which is equivalent to a vector for our purposes.¶

In [15]:
# It is convention to import NumPy with the alias np
import numpy as np

In [16]:
# TODO: Replace <FILL IN> with appropriate code
# Create a numpy array with the values 1, 2, 3
simpleArray = np.array([1, 2, 3])
# Perform the scalar product of 5 and the numpy array
timesFive = simpleArray * 5
print simpleArray
print timesFive

[1 2 3]
[ 5 10 15]

In [17]:
# TEST Scalar multiplication (2a)
Test.assertTrue(np.all(timesFive == [5, 10, 15]), 'incorrect value for timesFive')

1 test passed.


#### For this exercise, multiply the arrays u and v element-wise and compute their dot product.¶

In [18]:
# TODO: Replace <FILL IN> with appropriate code
# Create a ndarray based on a range and step size.
u = np.arange(0, 5, .5)
v = np.arange(5, 10, .5)

elementWise = u * v
dotProduct = np.dot(u, v)
print 'u: {0}'.format(u)
print 'v: {0}'.format(v)
print '\nelementWise\n{0}'.format(elementWise)
print '\ndotProduct\n{0}'.format(dotProduct)

u: [ 0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5]
v: [ 5.   5.5  6.   6.5  7.   7.5  8.   8.5  9.   9.5]

elementWise
[  0.     2.75   6.     9.75  14.    18.75  24.    29.75  36.    42.75]

dotProduct
183.75

In [19]:
# TEST Element-wise multiplication and dot product (2b)
Test.assertTrue(np.all(elementWise == [ 0., 2.75, 6., 9.75, 14., 18.75, 24., 29.75, 36., 42.75]),
'incorrect value for elementWise')
Test.assertEquals(dotProduct, 183.75, 'incorrect value for dotProduct')

1 test passed.
1 test passed.


#### For this exercise, multiply $\mathbf{A}$ times its transpose $( \mathbf{A}^\top )$ and then calculate the inverse of the result $( [ \mathbf{A} \mathbf{A}^\top ]^{-1} )$.¶

In [21]:
# TODO: Replace <FILL IN> with appropriate code
from numpy.linalg import inv

A = np.matrix([[1,2,3,4],[5,6,7,8]])
print 'A:\n{0}'.format(A)
# Print A transpose
print '\nA transpose:\n{0}'.format(A.T)

# Multiply A by A transpose
AAt = A * A.T
print '\nAAt:\n{0}'.format(AAt)

# Invert AAt with np.linalg.inv()
AAtInv = np.linalg.inv(AAt)
print '\nAAtInv:\n{0}'.format(AAtInv)

# Show inverse times matrix equals identity
# We round due to numerical precision
print '\nAAtInv * AAt:\n{0}'.format((AAtInv * AAt).round(4))

A:
[[1 2 3 4]
[5 6 7 8]]

A transpose:
[[1 5]
[2 6]
[3 7]
[4 8]]

AAt:
[[ 30  70]
[ 70 174]]

AAtInv:
[[ 0.54375 -0.21875]
[-0.21875  0.09375]]

AAtInv * AAt:
[[ 1.  0.]
[-0.  1.]]

In [22]:
# TEST Matrix math (2c)
Test.assertTrue(np.all(AAt == np.matrix([[30, 70], [70, 174]])), 'incorrect value for AAt')
Test.assertTrue(np.allclose(AAtInv, np.matrix([[0.54375, -0.21875], [-0.21875, 0.09375]])),
'incorrect value for AAtInv')

1 test passed.
1 test passed.


### Part 3: Additional NumPy and Spark linear algebra ¶

#### For this exercise, return the last 3 elements of the array features.¶

In [25]:
# TODO: Replace <FILL IN> with appropriate code
features = np.array([1, 2, 3, 4])
print 'features:\n{0}'.format(features)

# The last three elements of features
lastThree = features[-3:]

print '\nlastThree:\n{0}'.format(lastThree)

features:
[1 2 3 4]

lastThree:
[2 3 4]

In [26]:
# TEST Slices (3a)
Test.assertTrue(np.all(lastThree == [2, 3, 4]), 'incorrect value for lastThree')

1 test passed.


#### Note that the result of stacking two arrays is an ndarray. If you need the result to be a matrix, you can call np.matrix() on the result, which will return a NumPy matrix.¶

In [28]:
# TODO: Replace <FILL IN> with appropriate code
zeros = np.zeros(8)
ones = np.ones(8)
print 'zeros:\n{0}'.format(zeros)
print '\nones:\n{0}'.format(ones)

zerosThenOnes = np.hstack((zeros, ones))   # A 1 by 16 array
zerosAboveOnes = np.vstack((zeros, ones))  # A 2 by 8 array

print '\nzerosThenOnes:\n{0}'.format(zerosThenOnes)
print '\nzerosAboveOnes:\n{0}'.format(zerosAboveOnes)

zeros:
[ 0.  0.  0.  0.  0.  0.  0.  0.]

ones:
[ 1.  1.  1.  1.  1.  1.  1.  1.]

zerosThenOnes:
[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  1.  1.  1.  1.  1.  1.]

zerosAboveOnes:
[[ 0.  0.  0.  0.  0.  0.  0.  0.]
[ 1.  1.  1.  1.  1.  1.  1.  1.]]

In [29]:
# TEST Combining ndarray objects (3b)
Test.assertTrue(np.all(zerosThenOnes == [0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]),
'incorrect value for zerosThenOnes')
Test.assertTrue(np.all(zerosAboveOnes == [[0,0,0,0,0,0,0,0],[1,1,1,1,1,1,1,1]]),
'incorrect value for zerosAboveOnes')

1 test passed.
1 test passed.


#### For this exercise, create a DenseVector consisting of the values [3.0, 4.0, 5.0] and compute the dot product of this vector with numpyVector.¶

In [33]:
from pyspark.mllib.linalg import DenseVector

In [35]:
# TODO: Replace <FILL IN> with appropriate code
numpyVector = np.array([-3, -4, 5])
print '\nnumpyVector:\n{0}'.format(numpyVector)

# Create a DenseVector consisting of the values [3.0, 4.0, 5.0]
myDenseVector = DenseVector([3, 4, 5])
# Calculate the dot product between the two vectors.
denseDotProduct =myDenseVector.dot(numpyVector)

print 'myDenseVector:\n{0}'.format(myDenseVector)
print '\ndenseDotProduct:\n{0}'.format(denseDotProduct)

numpyVector:
[-3 -4  5]
myDenseVector:
[3.0,4.0,5.0]

denseDotProduct:
0.0

In [36]:
# TEST PySpark's DenseVector (3c)
Test.assertTrue(isinstance(myDenseVector, DenseVector), 'myDenseVector is not a DenseVector')
Test.assertTrue(np.allclose(myDenseVector, np.array([3., 4., 5.])),
'incorrect value for myDenseVector')
Test.assertTrue(np.allclose(denseDotProduct, 0.0), 'incorrect value for denseDotProduct')

1 test passed.
1 test passed.
1 test passed.


### Part 4: Python lambda expressions ¶

#### Here are some additional references that explain lambdas: Lambda Functions, Lambda Tutorial, and Python Functions.¶

In [37]:
# Example function
return x + 's'

<type 'function'>
cats

In [38]:
# As a lambda
addSLambda = lambda x: x + 's'

<type 'function'>
<function <lambda> at 0xb0ed98ec>
cats

In [39]:
# TODO: Replace <FILL IN> with appropriate code
# Recall that: "lambda x, y: x + y" creates a function that adds together two numbers
multiplyByTen = lambda x: 10*x
print multiplyByTen(5)

# Note that the function still shows its name as <lambda>
print '\n', multiplyByTen

50

<function <lambda> at 0xb0ed9924>

In [40]:
# TEST Python lambda expressions (4a)
Test.assertEquals(multiplyByTen(10), 100, 'incorrect definition for multiplyByTen')

1 test passed.


#### For this exercise, recreate the def behavior using lambda. Note that since a lambda expression returns a function, it can be used anywhere an object is expected. For example, you can create a list of functions where each function in the list was generated by a lambda expression.¶

In [41]:
# Code using def that we will recreate with lambdas
def plus(x, y):
return x + y

def minus(x, y):
return x - y

functions = [plus, minus]
print functions[0](4, 5)
print functions[1](4, 5)

9
-1

In [42]:
# TODO: Replace <FILL IN> with appropriate code
# The first function should add two values, while the second function should subtract the second
# value from the first value.
lambdaFunctions = [lambda x, y: x + y ,  lambda x, y: x - y]
print lambdaFunctions[0](4, 5)
print lambdaFunctions[1](4, 5)

9
-1

In [43]:
# TEST lambda fewer steps than def (4b)
Test.assertEquals(lambdaFunctions[0](10, 10), 20, 'incorrect first lambdaFunction')
Test.assertEquals(lambdaFunctions[1](10, 10), 0, 'incorrect second lambdaFunction')

1 test passed.
1 test passed.


#### For this exercise: you'll create one-parameter functions swap1 and swap2 that swap the order of a tuple; a one-parameter function swapOrder that takes in a tuple with three values and changes the order to: second element, third element, first element; and finally, a three-parameter function sumThree that takes in three tuples, each with two values, and returns a tuple containing two values: the sum of the first element of each tuple and the sum of second element of each tuple.¶

In [44]:
# Examples.  Note that the spacing has been modified to distinguish parameters from tuples.

# One-parameter function
a1 = lambda x: x[0] + x[1]
a2 = lambda (x0, x1): x0 + x1
print 'a1( (3,4) ) = {0}'.format( a1( (3,4) ) )
print 'a2( (3,4) ) = {0}'.format( a2( (3,4) ) )

# Two-parameter function
b1 = lambda x, y: (x[0] + y[0], x[1] + y[1])
b2 = lambda (x0, x1), (y0, y1): (x0 + y0, x1 + y1)
print '\nb1( (1,2), (3,4) ) = {0}'.format( b1( (1,2), (3,4) ) )
print 'b2( (1,2), (3,4) ) = {0}'.format( b2( (1,2), (3,4) ) )

a1( (3,4) ) = 7
a2( (3,4) ) = 7

b1( (1,2), (3,4) ) = (4, 6)
b2( (1,2), (3,4) ) = (4, 6)

In [45]:
# TODO: Replace <FILL IN> with appropriate code
# Use both syntaxes to create a function that takes in a tuple of two values and swaps their order
# E.g. (1, 2) => (2, 1)
swap1 = lambda x: (x[1], x[0])
swap2 = lambda (x0, x1): (x1, x0)
print 'swap1((1, 2)) = {0}'.format(swap1((1, 2)))
print 'swap2((1, 2)) = {0}'.format(swap2((1, 2)))

# Using either syntax, create a function that takes in a tuple with three values and returns a tuple
# of (2nd value, 3rd value, 1st value).  E.g. (1, 2, 3) => (2, 3, 1)
swapOrder = lambda (x, y, z): (y, z, x)
print 'swapOrder((1, 2, 3)) = {0}'.format(swapOrder((1, 2, 3)))

# Using either syntax, create a function that takes in three tuples each with two values.  The
# function should return a tuple with the values in the first position summed and the values in the
# second position summed. E.g. (1, 2), (3, 4), (5, 6) => (1 + 3 + 5, 2 + 4 + 6) => (9, 12)
sumThree = lambda x, y, z: (x[0] + y[0] + z[0], x[1] + y[1] + z[1])
print 'sumThree((1, 2), (3, 4), (5, 6)) = {0}'.format(sumThree((1, 2), (3, 4), (5, 6)))

swap1((1, 2)) = (2, 1)
swap2((1, 2)) = (2, 1)
swapOrder((1, 2, 3)) = (2, 3, 1)
sumThree((1, 2), (3, 4), (5, 6)) = (9, 12)

In [46]:
# TEST Lambda expression arguments (4c)
Test.assertEquals(swap1((1, 2)), (2, 1), 'incorrect definition for swap1')
Test.assertEquals(swap2((1, 2)), (2, 1), 'incorrect definition for swap2')
Test.assertEquals(swapOrder((1, 2, 3)), (2, 3, 1), 'incorrect definition fo swapOrder')
Test.assertEquals(sumThree((1, 2), (3, 4), (5, 6)), (9, 12), 'incorrect definition for sumThree')

1 test passed.
1 test passed.
1 test passed.
1 test passed.


#### The following Python keywords refer to simple statements that cannot be used in a lambda expression: assert, pass, del, print, return, yield, raise, break, continue, import, global, and exec. Also, note that assignment statements (=) and augmented assignment statements (e.g. +=) cannot be used either.¶

In [47]:
# Just run this code
# This code will fail with a syntax error, as we can't use print in a lambda expression
import traceback
try:
exec "lambda x: print x"
except:
traceback.print_exc()

Traceback (most recent call last):
File "<ipython-input-47-989250748d81>", line 5, in <module>
exec "lambda x: print x"
File "<string>", line 1
lambda x: print x
^
SyntaxError: invalid syntax


#### Note that map requires a one parameter function that returns a new value, filter requires a one parameter function that returns True or False, and reduce requires a two parameter function that combines the two parameters and returns a new value.¶

In [48]:
# Create a class to give our examples the same syntax as PySpark
class FunctionalWrapper(object):
def __init__(self, data):
self.data = data
def map(self, function):
"""Call map on the items in data using the provided function"""
return FunctionalWrapper(map(function, self.data))
def reduce(self, function):
"""Call reduce on the items in data using the provided function"""
return reduce(function, self.data)
def filter(self, function):
"""Call filter on the items in data using the provided function"""
return FunctionalWrapper(filter(function, self.data))
def __eq__(self, other):
return (isinstance(other, self.__class__)
and self.__dict__ == other.__dict__)
def __getattr__(self, name):  return getattr(self.data, name)
def __getitem__(self, k):  return self.data.__getitem__(k)
def __repr__(self):  return 'FunctionalWrapper({0})'.format(repr(self.data))
def __str__(self):  return 'FunctionalWrapper({0})'.format(str(self.data))

In [49]:
# Map example

# Create some data
mapData = FunctionalWrapper(range(5))

# Define a function to be applied to each element
f = lambda x: x + 3

# Imperative programming: loop through and create a new object by applying f
mapResult = FunctionalWrapper([])  # Initialize the result
for element in mapData:
mapResult.append(f(element))  # Apply f and save the new value
print 'Result from for loop: {0}'.format(mapResult)

# Functional programming: use map rather than a for loop
print 'Result from map call: {0}'.format(mapData.map(f))

# Note that the results are the same but that the map function abstracts away the implementation
# and requires less code

Result from for loop: FunctionalWrapper([3, 4, 5, 6, 7])
Result from map call: FunctionalWrapper([3, 4, 5, 6, 7])

In [51]:
# TODO: Replace <FILL IN> with appropriate code
dataset = FunctionalWrapper(range(10))

# Multiply each element by 5
mapResult = dataset.map(lambda x: x * 5)
# Keep the even elements
# Note that "x % 2" evaluates to the remainder of x divided by 2
filterResult = dataset.filter(lambda x: x % 2 == 0)
# Sum the elements
reduceResult = dataset.reduce(lambda x, y: x + y)

print 'mapResult: {0}'.format(mapResult)
print '\nfilterResult: {0}'.format(filterResult)
print '\nreduceResult: {0}'.format(reduceResult)

mapResult: FunctionalWrapper([0, 5, 10, 15, 20, 25, 30, 35, 40, 45])

filterResult: FunctionalWrapper([0, 2, 4, 6, 8])

reduceResult: 45

In [52]:
# TEST Functional programming (4e)
Test.assertEquals(mapResult, FunctionalWrapper([0, 5, 10, 15, 20, 25, 30, 35, 40, 45]),
'incorrect value for mapResult')
Test.assertEquals(filterResult, FunctionalWrapper([0, 2, 4, 6, 8]),
'incorrect value for filterResult')
Test.assertEquals(reduceResult, 45, 'incorrect value for reduceResult')

1 test passed.
1 test passed.
1 test passed.


#### For this exercise, reuse your lambda expressions from (4e) but apply them to dataset in the sequence: map, filter, reduce. Note that since we are composing the operations our result will be different than in (4e). Also, we can write our operations on separate lines to improve readability.¶

In [53]:
# Example of a mult-line expression statement
# Note that placing parentheses around the expression allow it to exist on multiple lines without
# causing a syntax error.
(dataset
.map(lambda x: x + 2)
.reduce(lambda x, y: x * y))

Out[53]:
39916800
In [54]:
# TODO: Replace <FILL IN> with appropriate code
# Multiply the elements in dataset by five, keep just the even values, and sum those values
finalSum = (dataset
.map(lambda x: x * 5)
.filter(lambda x: x % 2 == 0)
.reduce(lambda x, y: x + y))
print finalSum

100

In [55]:
# TEST Composability (4f)
Test.assertEquals(finalSum, 100, 'incorrect value for finalSum')

1 test passed.


In [56]:
# Run this code to view Criteo's agreement
# If this happens, open the webpage in a separate tab and follow the instructions from above.
from IPython.lib.display import IFrame

600, 350)

Out[56]:
In [57]:
# TODO: Replace <FILL IN> with appropriate code
# Just replace <FILL IN> with the url for dac_sample.tar.gz
import glob
import os.path
import tarfile
import urllib
import urlparse

# Paste url, url should end with: dac_sample.tar.gz

url = url.strip()
baseDir = os.path.join('data')
inputPath = os.path.join('cs190', 'dac_sample.txt')
fileName = os.path.join(baseDir, inputPath)
inputDir = os.path.split(fileName)[0]

def extractTar(check = False):
# Find the zipped archive and extract the dataset
tars = glob.glob('dac_sample*.tar.gz*')
if check and len(tars) == 0:
return False

if len(tars) > 0:
try:
tarFile = tarfile.open(tars[0])
if not check:
print 'Unable to open tar.gz file.  Check your URL.'
return False

tarFile.extract('dac_sample.txt', path=inputDir)
print 'Successfully extracted: dac_sample.txt'
return True
else:
print ('Alternatively, you can upload the dac_sample.tar.gz file to your Jupyter root ' +
'directory')
return False

if os.path.isfile(fileName):
print 'File is already available. Nothing to do.'
elif extractTar(check = True):
print 'tar.gz file was already available.'
elif not url.endswith('dac_sample.tar.gz'):
else:
# Download the file and store it in the same directory as this notebook
try:
urllib.urlretrieve(url, os.path.basename(urlparse.urlsplit(url).path))
except IOError:

extractTar()

Successfully extracted: dac_sample.txt

In [59]:
import os.path
baseDir = os.path.join('data')
inputPath = os.path.join('cs190', 'dac_sample.txt')
fileName = os.path.join(baseDir, inputPath)

if os.path.isfile(fileName):
rawData = (sc
.textFile(fileName, 2)
.map(lambda x: x.replace('\t', ',')))  # work with either ',' or '\t' separated data

print rawData.take(1)
rawDataCount = rawData.count()
print rawDataCount
# This line tests that the correct number of observations have been loaded
assert rawDataCount == 100000, 'incorrect count for rawData'
if rawDataCount == 100000:

[u'0,1,1,5,0,1382,4,15,2,181,1,2,,2,68fd1e64,80e26c9b,fb936136,7b4723c4,25c83c98,7e0ccccf,de7995b8,1f89b562,a73ee510,a8cd5504,b2cb9c98,37c9c164,2824a5f6,1adce6ef,8ba8b39a,891b62e7,e5ba7672,f54016b9,21ddcdc9,b1252a9d,07b5194c,,3a171ecb,c5c50484,e8b83407,9727dd16']