Find all words containing three consecutive pairs of double letters in a file of all English words located at:
Modules used: urllib
Author: Sibel Adali <adalis@rpi.edu>, Chuck Stewart <stewart@cs.rpi.edu>
Returns: All words matching condition and the count of found words
Pseudo Code:
open the file from the web with all the words in English
for each word in the file:
for all positions l in the word
if letters at positions (l and l+1) and (l+2 and l+3) and
(l+4 and l+5) are the same then
output word and increment the count
Code:
""" Find all words containing three consecutive pairs of double letters
in a file of all English words located at:
http://thinkpython.com/code/words.txt
**Modules used:** :py:mod:`urllib`
**Author**: Sibel Adali <adalis@rpi.edu>, Chuck Stewart <stewart@cs.rpi.edu>
**Returns:** All words matching condition and the count of found words
**Pseudo Code**::
open the file from the web with all the words in English
for each word in the file:
for all positions l in the word
if letters at positions (l and l+1) and (l+2 and l+3) and
(l+4 and l+5) are the same then
output word and increment the count
"""
__version__ = '1'
import urllib
def three_double(word):
""" Returns True if the word contains three consecutive pairs of
double letters and False otherwise.
"""
for l in range(len(word)-5):
if word[l] == word[l+1] and \
word[l+2]==word[l+3] and \
word[l+4]==word[l+5]:
return True
return False
# Comments that fit in a single line can be put in this format.
# Anything after a single pound sign is ignored.
# Main body of the program starts here
word_url = 'http://thinkpython.com/code/words.txt'
word_file = urllib.urlopen(word_url)
count = 0
for word in word_file:
word = word.strip().strip('\n')
if three_double(word):
print word
count = count + 1
if count == 0:
print 'No words found'
else:
print count, 'words are found'
This is a program for computing area and volume of a cylinder
Author: Sibel Adali (adalis)
Code:
"""
This is a program for computing area and volume
of a cylinder
Author: Sibel Adali (adalis)
"""
import math
def area_circle(radius):
return math.pi * radius**2
def volume_cylinder(radius, height):
area = area_circle(radius)
return area * height
def area_cylinder(radius, height):
circle_area = area_circle(radius)
height_area = 2 * radius * math.pi * height
return 2 * circle_area + height_area
def print_info(radius, height):
x = area_cylinder(radius,height)
print
print "area of a cylinder is with dimensions", radius, height, "is", x
print "area of a circle with radius", radius, "is", area_circle(radius)
print "volume of a cylinder with radius", radius, "and height", height,"is", volume_cylinder(radius, height)
print
######################
# Main program here
######################
print_info(4,10)
print_info(5,10)
Illustrates the use of formatted strings and functions.
Code:
""" Illustrates the use of formatted strings and functions. """
def thankyou(giftgiver, gift, goodgift):
msg = "Dear %s:\n" \
" Thank you for your thoughtful gift of %s.\n" \
" It was very nice of you to think of me.\n"
middlemessage = ""
if goodgift == 'yes':
middlemessage = "It is going to get really good use.\n"
endmessage = "Very sincerely yours, Me."
return (msg+middlemessage+endmessage) %(giftgiver, gift)
print thankyou('Josh', 'Microsoft swag', 'yes')
print thankyou('Grandma', 'sweater', 'no')
Illustrates list indexing. Find the middle value in a list. The median value is the middle value in the sorted version of the list. The median function has a side effect that it changes the input list.
Code:
""" Illustrates list indexing. Find the middle value in a list.
The median value is the middle value in the sorted version of
the list. The median function has a side effect that it changes
the input list.
"""
def middlevalue(L):
"""Return the middle value in the list. If the list is
of even length, then the average of the middle two values
is returned.
"""
idx = len(L)/2
if len(L)%2 == 1: ##odd length list
return L[idx]
elif len(L)>= 2:
return ( L[idx-1] + L[idx] )/2.
else:
return 0
def median(L):
L.sort()
return middlevalue(L)
### Test code
mylist = []
print mylist, "middle value", middlevalue(mylist)
mylist = [1,2,3]
print mylist, "middle value", middlevalue(mylist)
mylist = [1,2,3,4,5]
print mylist, "middle value", middlevalue(mylist)
mylist = [1,2,3,4]
print mylist, "middle value", middlevalue(mylist)
Illustrates the use of tuples to return two values at once, read the values using multiple assignment.
Code:
""" Illustrates the use of tuples to return two values at once,
read the values using multiple assignment.
"""
def next_years_population(bpop, fpop):
""" Returns the next years bunny and fox population. """
bpop_next = (10*bpop)/(1+0.1*bpop)-0.05*bpop*fpop
fpop_next = 0.4 * fpop +0.02*fpop*bpop
bpop_next = int(max(0, bpop_next))
fpop_next = int(max(0, fpop_next))
return (bpop_next, fpop_next)
b = 100
f = 5
bnext, fnext = next_years_population(b,f)
print "Next years population", bnext, fnext
b,f = bnext, fnext
bnext, fnext = next_years_population(b,f)
print "Next years population", bnext, fnext
Program for testing whether two circles intersect. Illustrates how to validate input so that no input can break the program.
Consider adding: 1. Allow floating point values (isdigit() only checks for integers) 2. Return detailed information about why the input was wrong
Code:
""" Program for testing whether two circles intersect.
Illustrates how to validate input so that no input can
break the program.
Consider adding:
1. Allow floating point values (isdigit() only checks for integers)
2. Return detailed information about why the input was wrong
"""
def distance( (x1,y1), (x2,y2) ):
""" Returns the distance between two points. """
return ((x1-x2)**2 + (y1-y2)**2)**(0.5)
def overlap_circle( (x1,y1,r1), (x2,y2,r2) ):
""" Returns true if two circles overlap or one circle is
within the other one. False is returned otherwise.
"""
dist = distance( (x1,y1), (x2,y2) )
if dist <= r1+r2:
return True
else:
return False
def valid_input(m):
""" Takes as input a list of values. Returns true if
there are two values, each value is an integer and the
final value is greater than zero. Returns false otherwise.
"""
if len(m) < 3:
return False
elif not (m[0].isdigit() and m[1].isdigit() and m[2].isdigit()):
return False
elif int(m[2]) <= 0:
return False
else:
return True
if __name__ == "__main__":
## this is the main body of the program.
## it best to keep the main program part small for debugging
line1 = raw_input('Circle 1 x,y,r ==> ')
line2 = raw_input('Circle 2 x,y,r ==> ')
m1 = line1.split(',')
m2 = line2.split(',')
if valid_input(m1) and valid_input(m2):
x1,y1,r1 = int(m[0]), int(m[1]), int(m[2])
x2,y2,r2 = int(m[0]), int(m[1]), int(m[2])
if overlap_circle( (x1,y1,r1), (x2,y2,r2) ):
print "Circles are overlapping"
else:
print "Circles are not overlapping"
else:
print "You entered bad input."
Code:
def order_vals(x,y,z):
if x <= y:
if y <= z:
return (x,y,z)
elif x <= z: ## x<=y, z<y
return (x,z,y)
else:
return(z,x,y)
else: ## y < x
if x <= z:
return (y,x,z)
elif y <= z: ## y < x, z < x
return (y,z,x)
else:
return (z,y,x)
def order_vals2(x,y,z):
if x<=y and y <= z:
return (x,y,z)
elif x<=y and z <= y:
return (x,z,y)
elif z<=x and x<= y:
return (z,x,y)
elif y<=x and x<= z:
return (y,x,z)
elif y<=z and z<= x:
return (y,z,x)
else:
return (z,y,x)
if __name__ == "__main__":
x = int(raw_input("Val 1 ===> "))
y = int(raw_input("Val 2 ===> "))
z = int(raw_input("Val 3 ===> "))
print order_vals2(x,y,z)
This module illustrates checking for overlapping rectangles and checking for which semester comes first.
Code:
""" This module illustrates checking for overlapping
rectangles and checking for which semester comes first.
"""
def no_overlap_rectangle( (x1,y1,x2,y2), (x3,y3,x4,y4) ):
""" Returns false if two rectangles do not overlap. """
if x4 < x1 or x2 < x3 or y4 < y1 or y2 < y3:
return True
else:
return False
def overlap_rectangle( (x1,y1,x2,y2), (x3,y3,x4,y4) ):
""" Returns true if two rectangles overlap. """
if not (x4 < x1 or x2 < x3 or y4 < y1 or y2 < y3) :
return True
return False
def semester_compare( (s1,y1), (s2,y2) ):
""" Returns 1 is the first semester comes before the second,
2 if the second semester comes before the first,
0 if the two are equal. Spring semester comes before fall
in the same year.
"""
s1 = s1.capitalize()
s2 = s2.capitalize()
if y1 < y2:
return 1
elif y2 < y1:
return 2
else: ## y1 == y2
if s1 == s2:
return 0
elif s1 == 'Spring':
return 1
else:
return 2
Illustrates the use of shortcutting in Boolean expressions. If we converted x to integer before checking if it contains a number, we would get an error.
Due to shortcutting, if x.isdigit() is false, we never run the next part that converts the string to integer.
Code:
""" Illustrates the use of shortcutting in Boolean expressions.
If we converted x to integer before checking if it contains a
number, we would get an error.
Due to shortcutting, if x.isdigit() is false, we never run
the next part that converts the string to integer.
"""
if __name__ == "__main__":
x = raw_input("Enter a positive number ==> ")
if x.isdigit() and int(x) > 0:
print "ok"
else:
print "bad input"
Find all values in the list that are less than the value before.
Code:
""" Find all values in the list that are
less than the value before.
"""
co2_levels = [ (2001, 320.03), (2003, 322.16), \
(2004, 328.07),\
(2006, 323.91), (2008, 341.47), \
(2009, 348.92),\
(2010, 357.29), (2011, 363.77), \
(2012, 361.51),\
(2013, 300.47) ]
i = 0
while (i < len(co2_levels)-1):
year1, val1 = co2_levels[i]
year2, val2 = co2_levels[i+1]
if val2 < val1:
print "Went down from", year1, "to", year2
i += 1
Find the number of values in the list that are greater than the average value.
Code:
""" Find the number of values in the list that
are greater than the average value.
"""
co2_levels = [ (2001, 320.03), (2003, 322.16), \
(2004, 328.07),\
(2006, 323.91), (2008, 341.47), \
(2009, 348.92),\
(2010, 357.29), (2011, 363.77), \
(2012, 361.51),\
(2013, 382.47) ]
## Code to find the total/average value
i = 0
sum_levels = 0
while i < len(co2_levels):
year, val = co2_levels[i]
sum_levels += val
i += 1
avg_value = sum_levels/len(co2_levels)
print "The average value of co2_level is", avg_value
## Code to find values higher than the average
i = 0
count = 0
while (i < len(co2_levels)):
year, val = co2_levels[i]
if val > avg_value:
count += 1
i += 1
print "The number of values greater than average is", count
Printing list items in backwards order.
Code:
""" Printing list items in backwards order.
"""
co2_levels = [ (2001, 320.03), (2003, 322.16), \
(2004, 328.07),\
(2006, 323.91), (2008, 341.47), \
(2009, 348.92),\
(2010, 357.29), (2011, 363.77), \
(2012, 361.51),\
(2013, 382.47) ]
i = len(co2_levels)-1
while (i >= 0):
print co2_levels[i]
i -= 1
Program to convert a string containing a list of integers to a list of integers
Code:
"""Program to convert a string containing a list of
integers to a list of integers
"""
x = '1,2,3,4'
myx = x.split(',')
print myx
i = 0
while (i < len(myx)):
myx[i] = int( myx[i] )
i+=1
print myx
## myx = [1,2,3,4]
Print a Christmas tree –I know it is too early
Code:
""" Print a Christmas tree --I know it is too early
"""
i = 1
while (i < 11):
print " "*(4-i/2) + "*"*i
i += 2
print " ***\n" * 3
Example illustrates how function parameters are aliases of lists. In other words, if you pass a list as an argument to a list, and the change the list in the function, the original list is also changed.
Code:
""" Example illustrates how function parameters are aliases
of lists. In other words, if you pass a list as an argument
to a list, and the change the list in the function, the
original list is also changed.
"""
def smallest_in_list(L):
"""This L will be an alias of the list used as an argument.
"""
L.sort()
if len(L)>0:
return L[0]
else:
None
def worse_version_of_smallest_in_list():
"""Bad because it uses a global variable l1.
Do not write code like this, it is a type of
hard coding and very hard to debug.
"""
l1.sort()
if len(l1)>0:
return l1[0]
else:
None
def fine_version_of_smallest_in_list(l1):
"""Fine, because l1 now becomes a local variable.
Function always uses the most local definition of a
variable.
"""
l1.sort()
if len(l1)>0:
return l1[0]
else:
None
l1 = ['cat','dog', 'zebra', 'bat', 'fish', 'kangaroo', 'baluga whale']
print "before calling the function", l1
min_val = smallest_in_list(l1)
print "The minimum value is", min_val
print "after calling the function", l1
print "the function had a side effect of changing the list."
Note that the for loop simply assigns the variable item to an element of the for loop:
l1 = ['cat','dog', 'zebra', 'bat', 'fish', 'kangaroo', 'baluga whale']
for item in l1:
print item
This for loop is equivalent to executing:
item= 'cat'
item= 'dog'
item= 'zebra'
item= 'bat'
item= 'fish'
item= 'kangaroo'
item= 'baluga whale'
Code:
""" This example illustrates the use of while and
for loops.
Note that the for loop simply assigns the
variable item to an element of the for loop::
l1 = ['cat','dog', 'zebra', 'bat', 'fish', 'kangaroo', 'baluga whale']
for item in l1:
print item
This for loop is equivalent to executing::
item= 'cat'
item= 'dog'
item= 'zebra'
item= 'bat'
item= 'fish'
item= 'kangaroo'
item= 'baluga whale'
"""
l1 = ['cat','dog', 'zebra', 'bat', 'fish', 'kangaroo', 'baluga whale']
print "First way: basic for loop"
print "list before the loop", l1
mystr = ""
for item in l1:
item = item.capitalize()
print item
print "list after the loop, see items did not change", l1
print
##################################################
## Given the items in the list are strings, the
## assignment actually copies the items. Hence,
## the above loop will not change the items. To
## accomplish this, you need to use indexing, such
## as l1[i] = l1[i].capitalize()
## There are a number of ways you can get an index.
##################################################
print "First way, with an index that we create"
print "This is not a good way, don't do this."
print "list before the loop", l1
mystr = ""
i = 0
for item in l1:
l1[i] = l1[i].capitalize()
print "%d. %s" %(i+1, l1[i])
i += 1
print "list after the loop, see items changed", l1
print
##################################################
## Since the while loop already creates indices
## we can just the while loop for the same process
## as in the previous loop
##################################################
l1 = ['cat','dog', 'zebra', 'bat', 'fish', 'kangaroo', 'baluga whale']
print "Second way, use a while loop to directly create an index"
i = 0
print "list before the loop", l1
while i < len(l1):
l1[i] = l1[i].capitalize()
print "%d. %s" %(i+1, l1[i])
i += 1
print "list after the loop, see items changed", l1
print
##################################################
## The third way is to create the indices using
## the range function first. The range function
## returns a list
## >>> range(len(l1))
## [0, 1, 2, 3, 4, 5, 6]
##
## for i in range(len(l1))
##
## is the same of executing the following assignments
##
## i=0
## i=1
## i=2
## i=3
## i=4
## i=5
## i=6
##
## Now, we can use these indices to access the list items
##################################################
l1 = ['cat','dog', 'zebra', 'bat', 'fish', 'kangaroo', 'baluga whale']
print "Third way, using a for loop of indices"
print "list before the loop", l1
for i in range(len(l1)):
l1[i] = l1[i].capitalize()
print "%d. %s" %(i+1, l1[i])
print "list after the loop, see items changed", l1
print
##################################################
## The final way is to create indices and list items
## together. We have not yet seen this, but it is
## very useful.
##
## for item in enumerate(l1):
## print item
## is the same of executing the following assignments
##
##
## item = (0, 'Cat')
## item = (1, 'Dog')
## item = (2, 'Zebra')
## item = (3, 'Bat')
## item = (4, 'Fish')
## item = (5, 'Kangaroo')
## item = (6, 'Baluga whale')
##
## You get a list item and its index together.
## Given you get a tuple, you can directly access them
## using the following format:
##################################################
l1 = ['cat','dog', 'zebra', 'bat', 'fish', 'kangaroo', 'baluga whale']
print "Fourth way, using enumerate"
print "list before the loop", l1
for (i, val) in enumerate(l1):
l1[i] = l1[i].capitalize()
print "%d. %s" %(i+1, val)
print "list after the loop, see items changed", l1
print
Example for loop using a list of tuples. Since each element is a tuple, it is a copy of the elements in the list (you cannot change individual components of tuples):
for item in co2_levels:
print item
is the same as executing:
item=(2001, 320.03)
item=(2003, 322.16)
item=(2004, 328.07)
item=(2006, 323.91)
item=(2008, 341.47)
item=(2009, 348.92)
item=(2010, 357.29)
item=(2011, 363.77)
item=(2012, 361.51)
item=(2013, 300.47)
Code:
""" Example for loop using a list of tuples. Since
each element is a tuple, it is a copy of the
elements in the list (you cannot change
individual components of tuples)::
for item in co2_levels:
print item
is the same as executing::
item=(2001, 320.03)
item=(2003, 322.16)
item=(2004, 328.07)
item=(2006, 323.91)
item=(2008, 341.47)
item=(2009, 348.92)
item=(2010, 357.29)
item=(2011, 363.77)
item=(2012, 361.51)
item=(2013, 300.47)
"""
co2_levels = [ (2001, 320.03), (2003, 322.16), \
(2004, 328.07),\
(2006, 323.91), (2008, 341.47), \
(2009, 348.92),\
(2010, 357.29), (2011, 363.77), \
(2012, 361.51),\
(2013, 300.47) ]
##print all values greater than 350.
for item in co2_levels:
if item[1] > 350:
print item[0], item[1]
Loop example with a list of lists. This is different because each element in the list is a list itself. Hence the expression:
for item in mylist:
print item
would execute:
item = [1]
item = [2]
item = [3]
At each point, item is a list. Hence, it is not a copy of the list inside mylist, but an alias.
As a result, if I change item, the original list also changes.
Code:
""" Loop example with a list of lists. This is different
because each element in the list is a list itself. Hence
the expression::
for item in mylist:
print item
would execute::
item = [1]
item = [2]
item = [3]
At each point, item is a list. Hence, it is not a copy
of the list inside mylist, but an alias.
As a result, if I change item, the original list also changes.
"""
mylist = [ [1], [2], [3] ]
print "before the loop", mylist
for item in mylist:
item[0] *= 4
print "after the loop, see the list has changed", mylist
Simple example to compare the method to use loops to index lists.
Code:
""" Simple example to compare the method to use loops to
index lists.
"""
animals = ['cat', 'skunk', 'deer', 'chipmunk', 'slugs']
i = 0
while i < len(animals):
print i, animals[i]
i += 1
#####
print "How does the for loop work"
for i in range(len(animals)):
print i, animals[i]
Program to illustrate how to check if a word has two consecutive double letters. We also illustrate how to read user input repeatedly using a while loop:
read from the user a word
go through every position i in the word:
check if letters i, i+1 are the same
and if letters i+2 and i+3 are the same
Shows two examples functions to implement the same function to illustrate that any loop is exited as soon as the function executes a return.
Code:
"""
Program to illustrate how to check if a word has two consecutive
double letters. We also illustrate how to read user input
repeatedly using a while loop::
read from the user a word
go through every position i in the word:
check if letters i, i+1 are the same
and if letters i+2 and i+3 are the same
Shows two examples functions to implement the same function
to illustrate that any loop is exited as soon as the function
executes a return.
"""
def check_two_consecutive_double(iword):
""" Example function shows how to track whether a condition is
True, and return it.
If two consecutive double letters is found at any position
in the word, the variable isdouble is set to True. Otherwise
its original value of False is returned.
"""
isdouble = False
for i in range(0, len(iword)-3):
if iword[i] == iword[i+1] and\
iword[i+2] == iword[i+3]:
isdouble = True
return isdouble
def check_two_consecutive_double2(iword):
""" Example shows how to return True as soon as the function finds
the condition is True. The last line of the function is
reached only if the condition being checked was never True. In
this case, we can safely return False.
If two consecutive double letters is found at any position
in the word, return True immediately. If the condition is never true,
then return False.
"""
iword = iword.lower()
for i in range(0, len(iword)-3):
##test code
##print iword[i], iword[i+1], iword[i+2], iword[i+3]
if iword[i] == iword[i+1] and\
iword[i+2] == iword[i+3]:
return True
return False
###############
## Testing the program
###############
if __name__ == '__main__':
iword = raw_input("Please enter a word (stop to end) ==> ")
while iword.lower() != 'stop':
isdouble = check_two_consecutive_double2(iword)
if isdouble:
print iword, "is a double consecutive letter word"
else:
print iword, "is no good"
iword = raw_input("Please enter a word (stop to end) ==> ")
This program shows how to find all the local maxima in a list
Local maxima are all the values that are greater than the values immediate before and after them in the list (first and last entries cannot be a local maxima)
Write a function to return the list of all local maxima
Code:
"""
This program shows how to find all the local maxima in a list
Local maxima are all the values that are greater than the values
immediate before and after them in the list (first and last entries
cannot be a local maxima)
Write a function to return the list of all local maxima
"""
def local_maxima(L):
""" Find the local maxima by going through all elements
in the list, excluding the first and last elements.
"""
lm = [] ##the list of all local maxima found.
for i in range(1, len(L)-1):
if L[i-1] < L[i] and L[i] > L[i+1]:
lm.append( L[i] )
return lm
## Some reasonable test cases are here
L = [0, 2, 3, 6, 4, 8, 2, 3, 5, 1]
L = [0,1,2]
L = []
L = [0]
L = [0,1]
print "Local maxima of", L
print local_maxima(L)
Example program to find the two closest values in a list
We will revisit this program later. We are using this problem to show how to generate all possible pairs of indices from a list without repeating any pair ( so if we generate pair of indices 0,1 we will not generate 1,0 )
We also illustrate how to find the minimum of a given set of values
To find the minimum, we need to initiate a variable first before the loop. However, we must make sure that the minimum distance is actually a real distance. Hence, we use the first pair for this purpose.
This program is not robust as it fails it the list has no values in it.
Code:
"""
Example program to find the two closest values in a list
We will revisit this program later. We are using this problem
to show how to generate all possible pairs of indices from a list
without repeating any pair ( so if we generate pair of indices 0,1
we will not generate 1,0 )
We also illustrate how to find the minimum of a given set of values
To find the minimum, we need to initiate a variable first before the
loop. However, we must make sure that the minimum distance is actually
a real distance. Hence, we use the first pair for this purpose.
This program is not robust as it fails it the list has no values in it.
"""
L = [2, 40, 31, 10]
pair = (0,1) ## initial value of a pair
min_dist = abs( L[0]-L[1] ) ##initial value for closest distance between a pair
for i in range(len(L)-1):
val1 = L[i]
for j in range(i+1,len(L)):
val2 = L[j]
print "(i,j): %d,%d" %(i,j), "values", val1, val2
new_dist = abs( L[i]-L[j])
if new_dist < min_dist:
min_dist = new_dist
pair = (i,j)
print "Closest two values are at indices", pair
x,y = pair
print L[x], L[y]
Example image manipulation program. It copies all pixels from one image to another new image.
To do this, we must first create an array of pixels for each image. An array is similar to a list of lists.
Code:
"""
Example image manipulation program. It copies all pixels
from one image to another new image.
To do this, we must first create an array of pixels for each image.
An array is similar to a list of lists.
"""
from PIL import Image
def copy_image():
im = Image.open("bolt.jpg")
pix = im.load() ##get an array of pixels for the image
w,h = im.size
newim = Image.new("RGB", (w,h), "white")
newpix = newim.load() ##get an array of pixels for the image
## example to flip the image in different ways
## change this variable to try different versions.
image_flip = 'down'
for i in range(w):
for j in range(h):
if image_flip == 'right':
newpix[i,j] = pix[w-i-1,j]
elif image_flip == 'down':
newpix[i,j] = pix[i,h-j-1]
elif image_flip == 'down_right':
newpix[i,j] = pix[w-i-1,h-j-1]
else:
newpix[i,j] = pix[i,j]
newim.show()
if __name__ == '__main__':
copy_image()
Example program that shows control of loops using break
Code:
""" Example program that shows control of loops
using break
"""
def print_val(val):
print val, val**(0.5)
############### MAIN PROGRAM
if __name__ == "__main__":
while True: ## the main code will continue to run until break is executed
val = raw_input("Please enter a number (-1 to stop) ==> ")
if val == '-1':
break
if not val.isdigit():
print "Please enter a number"
else: ## do something, this function is a place holder
print_val(int(val))
print "Finished the main loop"
Example program for controlling loops using break
Note that the break exits from the inner most loop, but not the upper loop!
Code:
""" Example program for controlling loops using break
Note that the break exits from the inner most loop,
but not the upper loop!
"""
if __name__ == "__main__":
for i in range(4):
print i
for j in range(4):
if i < j:
print "BREAK"
break
print i,j
Example program illustrates the use of break and continue to control how a loop executes
Read input until user types -1, give warning for bad input (not number) Report the average, min, max of all the numbers user entered
Code:
""" Example program illustrates the use of break and continue to
control how a loop executes
Read input until user types -1, give warning for bad input (not number)
Report the average, min, max of all the numbers user entered
"""
if __name__ == "__main__":
user_inputs = []
while True:
print user_inputs
val = raw_input("Enter a number (-1 to stop) ==> ")
if val == '-1':
break ## exit the loop when -1 is entered
if not val.isdigit():
print "Please enter a number"
continue ## skip the remaining part of the loop for this input
##val is not -1 and val is digit
val = int(val)
user_inputs.append(val)
print "Your statistics"
print "Min: %d, Max: %d, Average: %f" \
%( min(user_inputs), max(user_inputs), \
float(sum(user_inputs))/len(user_inputs) )
Simple program to illustrate writing into a file
Open a file Write multiple lines Close the file
Code:
""" Simple program to illustrate writing into a file
Open a file
Write multiple lines
Close the file
"""
if __name__ == "__main__":
f = open('myfile.txt','w')
for i in range(1,11):
f.write(str(i)+ '\n') ## each line must end with a newline
## when writing to a file, the close function makes sure that
## the file is properly saved
f.close()
Example of file parsing, reads a file of the form:
lego type, lego number 2x2,1 2x1,5
in which each line is a lego type and a given count, with a header in the first line.
Example file for this program can be found at:
Code:
""" Example of file parsing, reads a file of the form:
lego type, lego number
2x2,1
2x1,5
in which each line is a lego type and a given count, with a header
in the first line.
Example file for this program can be found at:
http://www.cs.rpi.edu//~sibel/csci1100/fall2014/files/
"""
if __name__ == "__main__":
f = open('legos.txt')
i = 0 ## used as a counter of the lines in the file
legos = []
for line in f: ## iterates over each line executing: line = f.readline()
i += 1
if i== 1:
continue ## skip the header line
print i ## debugging code
lego_line = (line.strip()).split(",")
lego_type = lego_line[0].strip()
lego_number = int(lego_line[1])
legos = legos + [lego_type]*lego_number ## append the lego information
print legos
We will process yelp data from Lab 4
Example file for this program can be found at:
Code:
""" Example of parsing a file that has
- data on each line (no header)
- but undetermined length (the last entry, number of reviews may vary
We will process yelp data from Lab 4
Example file for this program can be found at:
http://www.cs.rpi.edu//~sibel/csci1100/fall2014/files/
"""
if __name__ == "__main__":
count = 0
for line in open('yelp.txt'): ##Read each line
count += 1
m = line.strip().split("|")
reviews = m[6:] ##use split to get the reviews
for i in range(len(reviews)): ## convert reviews to integer
reviews[i] = int( reviews[i] )
## print information for each business
print "%s: Avg review: %.2f" \
%(m[0], sum(reviews)/float(len(reviews)))
##if count > 10: ##debugging code to test the first 10 lines
## break
print "Number of businesses", count
Program to parse regular data Each line is a row of data
Each line is delimited by some character
For example: CSV means comma separated values
This program reads a county name and finds the name of all the Farmer’s markets in that country, and also prints the number
Example file for this program can be found at:
Code:
"""
Program to parse regular data
Each line is a row of data
Each line is delimited by some character
For example: CSV means comma separated values
Read the file line by line
Extract information from it
This program reads a county name and finds the name of all the
Farmer's markets in that country, and also prints the number
Example file for this program can be found at:
http://www.cs.rpi.edu//~sibel/csci1100/fall2014/files/
"""
def parse_line(line):
line = line.strip()
m = line.split(',')
return m
def parse_file(fname, county):
f = open(fname)
header = f.readline() ## the first line is header, we are skipping it
i = 0
cnt = 0
for line in f:
i += 1
info = parse_line(line)
if info[0] == county:
print info[1]
cnt += 1
#if i > 2: ## debugging code to only look at the first few lines
# break
print "Found", cnt, "markets"
######
if __name__ == "__main__":
county = raw_input("Please enter a county name ==> ")
parse_file('fm.csv', county)
Program to parse regular data Each line is a row of data
Each line is delimited by some character
For example: CSV means comma separated values
This program shows how to parse consecutive blocks of data The file is sorted by the county, so when we read a new county name we know that we started processing a new county.
Algorithm:
Skip the header Read the first line ##note assumes such a line exists Find the name of the county Read the rest of the file line by line
Extract information from it If it is the same county as the previous line, then
increment the count of markets for this county
- Else ## we have a new county
- store the current county and
- the number of markets in that county in a list
start a new county with count=1
When finished, we have to add the last county we were counting to the list We can now sort this list to find
the top 3 or 10 counties with the highest number of marketsExample file for this program can be found at:
Code:
"""
Program to parse regular data
Each line is a row of data
Each line is delimited by some character
For example: CSV means comma separated values
This program shows how to parse consecutive blocks of data
The file is sorted by the county, so when we read a new county name
we know that we started processing a new county.
Algorithm:
Skip the header
Read the first line ##note assumes such a line exists
Find the name of the county
Read the rest of the file line by line
Extract information from it
If it is the same county as the previous line, then
increment the count of markets for this county
Else ## we have a new county
store the current county and
the number of markets in that county in a list
start a new county with count=1
When finished, we have to add the last county we were counting to the list
We can now sort this list to find
the top 3 or 10 counties with the highest number of markets
Example file for this program can be found at:
http://www.cs.rpi.edu//~sibel/csci1100/fall2014/files/
"""
def parse_line(line):
line = line.strip()
m = line.split(',')
return m
def parse_file(fname):
f = open(fname)
header = f.readline() ##skip the header line
line = f.readline() ## read the line for the first county
info = parse_line(line)
cnt = 1
county = info[0]
i = 0
markets = [] ## to be used for sorting by number of markets
for line in f:
i += 1
info = parse_line(line)
if info[0] == county: ## same as previous county
cnt += 1
else: ## new county found
markets.append((cnt, county)) ## store the count for the old county
county = info[0] ## start the new county
cnt = 1
markets.append( (cnt, county) ) ## the last county needs to be added
markets.sort(reverse=True)
## sort the list of tuples (by the first value first,
## and then by the second value)
for i in range(10): ## print the top 10
print "County: %s, number: %d" %(markets[i][1], markets[i][0])
######
## This is the main program
#####
if __name__ == "__main__":
parse_file('fm.csv')
Illustrates the use of HTML files over the web in programs
Files from the web are opened using the urllib library
After this, the file is processed in the same way as a file on your own hard disk.
Note we are using different methods to read the files but they are all equivalent and are used to illustrate the different methods
Function:
Returns True if the word is a palindrome, the word is the same when read forward and backwards. It returns False otherwise.
Code:
""" Illustrates the use of HTML files over the web in programs
Files from the web are opened using the urllib library
After this, the file is processed in the same way as a file
on your own hard disk.
Note we are using different methods to read the files
but they are all equivalent and are used to illustrate the
different methods
"""
import urllib
def is_palindrome(word):
""" Returns True if the word is a palindrome, the word is the
same when read forward and backwards.
It returns False otherwise.
"""
word = word.strip()
word = word.lower()
for i in range( len(word)/2 ):
if word[i] != word[-i-1]:
return False ## if a mismatch is found, return False
return True # if no mismatch is found, return True
###########
if __name__ == "__main__":
word_url = 'http://thinkpython.com/code/words.txt'
word_file = urllib.urlopen(word_url)
i = 0
while True:
i += 1
word = word_file.readline()
if word == '': ## this is true when the end of file is reached
break
if is_palindrome(word):
print word.strip()
We are trying to find the number of unique actors in the database This solution is O(N^2) operations
Example file for this program can be found at:
Code:
"""
We are trying to find the number of unique actors in the database
This solution is O(N^2) operations
Example file for this program can be found at:
http://www.cs.rpi.edu//~sibel/csci1100/fall2014/files/
"""
if __name__ == "__main__":
count = 0
actors = []
for line in open('imdb_data.txt'):
m = line.strip().split('|')
name = m[0].strip()
movie = m[1].strip()
if name not in actors:
actors.append(name) ## This is an O(N) operation
count += 1
## Since we repeat the loop O(N) times and each time
## we conduct an O(N) operation, the total complexity is
## O(N*N=N^2)
print "Total", count, "movies"
print "Total", len(actors), "actors"
We are trying to find the number of unique actors in the database This solution is O(N) operations
Example file for this program can be found at:
Code:
"""
We are trying to find the number of unique actors in the database
This solution is O(N) operations
Example file for this program can be found at:
http://www.cs.rpi.edu//~sibel/csci1100/fall2014/files/
"""
if __name__ == "__main__":
count = 0
actors = set([])
for line in open('hanks.txt'):
m = line.strip().split('|')
name = m[0].strip()
movie = m[1].strip()
actors.add(name) ## This is an O(1) operation
count += 1
## The above loop is repeated O(N) times, hence
## total complexity of this operation is O(1*N)=O(N)
print "Total", count, "movies"
print "Total", len(actors), "actors"
for actor in sorted(actors):
print actor
We are trying to find the number of unique actors in the database This solution is O(N) operations
Example file for this program can be found at:
Code:
"""
We are trying to find the number of unique actors in the database
This solution is O(N) operations
Example file for this program can be found at:
http://www.cs.rpi.edu//~sibel/csci1100/fall2014/files/
"""
if __name__ == "__main__":
actors = []
for line in open('imdb_data.txt'):
m = line.strip().split('|')
name = m[0].strip()
movie = m[1].strip()
actors.append(name) ## each append operation is O(1)
## so far 0(n)
actorset = set(actors) ## this conversion if also O(N)
print "Total", len(actorset), "actors"
Find the movies common between George Clooney, Catherine Zeta-Jones and Brad Pitt
Illustrates the use of sets to answer complex queries
Example file for this program can be found at:
Code:
"""
Find the movies common between George Clooney,
Catherine Zeta-Jones and Brad Pitt
Illustrates the use of sets to answer complex queries
Example file for this program can be found at:
http://www.cs.rpi.edu//~sibel/csci1100/fall2014/files/
"""
def common_movies(name1, movies1, name2, movies2):
common = movies1 & movies2
print "Movies with %s and %s:" %(name1,name2)
print "Total:", len(common)
for movie in common:
print movie
print
if __name__ == "__main__":
movies1 = set([]) ## all movies of george clooney
movies2 = set([]) ## all movies of catherine zeta jones
movies3 = set([]) ## all movies of brad pitt
for line in open('imdb_data.txt'):
m = line.strip().split('|')
name = m[0].strip()
movie = m[1].strip()
if name == 'Clooney, George':
movies1.add(movie)
if name == 'Zeta-Jones, Catherine':
movies2.add(movie)
if name == 'Pitt, Brad':
movies3.add(movie)
print "Number of movies for Clooney", len(movies1)
print "Number of movies for Zeta", len(movies2)
print "Number of movies for Pitt", len(movies3)
common_movies("Clooney", movies1, "Zeta", movies2)
common_movies("Clooney", movies1, "Pitt", movies3)
common_movies("Pitt", movies3, "Zeta", movies2)
print "Movies with all three together:"
print movies1&movies2&movies3
print
print "Movies with Clooney and Pitt, but not Zeta"
print (movies1&movies3) - movies2
print
Automated solver for easy sudoku puzzles by finding all possible values for a given location using sets
If there is a single possible location, it can be safely put
The main algorithm:
Code:
"""
Automated solver for easy sudoku puzzles by finding all
possible values for a given location using sets
If there is a single possible location, it can be safely put
The main algorithm:
while there is an empty location with only a single possible value
add the value to the board
"""
import lab06_util
def print_board(board):
""" Prints a board """
for i in range(9):
if i%3 == 0:
print " " + "-"*29 + " "
line = ""
for j in range(9):
if j%3 == 0:
line += "|"
line += " " + board[i][j] + " "
print line + "|"
print " " + "-"*29 + " "
def values_present(board, row, col):
""" All the values present in the board along a row/column
and 3x3 grid, except for the given location is returned
as a set.
"""
values = set()
gridx = 3*(row/3)
gridy = 3*(col/3)
for i in range(9):
if i != col:
values.add ( board[row][i] )
for i in range(9):
if i != row:
values.add ( board[i][col] )
for i in range(gridx,gridx+3):
for j in range(gridy, gridy+3):
if i != row and j != col:
values.add( board[i][j] )
return values
def values_possible(board, row, col):
""" Given a row/column, returns the set of all possible values
for that location, by subtracting from the set of values
1 - 9 (as strings) the values that are present in row/column
and 3x3 grid.
"""
possible = set()
for i in range(1,10):
possible.add ( str(i) )
return possible - values_present(board, row, col)
def find_possible_location(board):
""" For every row/column, find a location that is empty
and has only one possible value. Return the row/column
and the value. If there is no such value, return None.
"""
for i in range(9):
for j in range(9):
if board[i][j] == '.':
pos = values_possible(board, i, j)
if len(pos) == 1:
return i,j, pos.pop()
return None, None, None
### Main body of the function
### Read the board, and add a value to the board repeatedly
### until no such value exists (either board is solved or
### or there is no location with a single possible value.
if __name__ == "__main__":
board = lab06_util.read_sudoku('easy.txt')
while True:
print_board(board)
i,j,val = find_possible_location(board)
if i != None:
print "Possible at", i,j
print "Value", val
board[i][j] = val
raw_input()
else:
break
Naive solution for finding the number of movies for an actor
Use a list of lists: [ [actorname, num movies], ... ]
This is an O(N^2) solution, so not recommended. Dictionaries are made for this type of problems.
Code:
"""
Naive solution for finding the number of movies for an actor
Use a list of lists: [ [actorname, num movies], ... ]
For each movie read:
Find the index in this list for this actor
Add 1 to the number of movies
This is an O(N^2) solution, so not recommended. Dictionaries
are made for this type of problems.
"""
def find_actor(actorlist, actor):
""" Find the index for an actor in the list of lists called actorlist
Return None if the actor is not found
"""
for i in range( len(actorlist) ):
if actorlist[i][0] == actor:
return i
return None
if __name__ == "__main__":
actorlist = [] ## [name, num_movies]
for line in open('hanks.txt'):
m = line.strip().split('|')
name = m[0].strip()
movie = m[1].strip()
i = find_actor(actorlist, name)
if i != None:
actorlist[i][1] += 1
else: ## actor is not in the list yet
actorlist.append( [name, 1] )
print actorlist
Second naive solution for finding the number of movies for an actor
Use a list of actor names, the actor name is repeated once for each movie they are in:
actors_list For each movie read:
Add the actor name to the list
Sort actors_list ## sorting is O(N log N) as we will see, better than O(N^2)
start with the first actor and count = 1 Go through each name in actors_list (starting at the second location):
- if the name is same as the previous one:
- add 1 to the number of movies
- else:
- print the previous count (or add to a list) start a new count for the given actor with count = 1
print the last actor (or add to a list)
This is an O(N log N) solution, better than the previous one, but still dictionary solution (next one) is much faster.
Code:
"""
Second naive solution for finding the number of movies for an actor
Use a list of actor names, the actor name is repeated once for each
movie they are in:
actors_list
For each movie read:
Add the actor name to the list
Sort actors_list ## sorting is O(N log N) as we will see, better than O(N^2)
start with the first actor and count = 1
Go through each name in actors_list (starting at the second location):
if the name is same as the previous one:
add 1 to the number of movies
else:
print the previous count (or add to a list)
start a new count for the given actor with count = 1
print the last actor (or add to a list)
This is an O(N log N) solution, better than the previous one, but
still dictionary solution (next one) is much faster.
"""
if __name__ == "__main__":
actors = []
for line in open('hanks.txt'):
m = line.strip().split('|')
name = m[0].strip()
actors.append( name )
actors.sort() ## N log N
actorlist = []
current_actor = actors[0]
num_movies = 1
for i in range(1, len(actors)):
if current_actor == actors[i]:
num_movies += 1
else:
actorlist.append( [current_actor, num_movies] )
current_actor = actors[i]
num_movies = 1
actorlist.append( [current_actor, num_movies] )
print actorlist
Simple example of dictionary methods
Code:
""" Simple example of dictionary methods """
animals = {'tiger': 91.0, 'belgian horse': 162.6, 'indian elephant': 280.0,
'lion': 97.0}
print animals ## print the full dictionary
print animals.keys() ## the keys of the dictionary is converted to a list
print sorted(animals.keys()) ## the keys of the dictionary is converted to a sorted list
print sorted(animals) ## identical to sorted(animals.keys())
print animals.values() ## all the values in the dictionary is put in a list of values
## print all key/value pairs
print
for key in animals:
print key, animals[key]
## print all key/value pairs, the result is the same as the above
## the above is the better method for iterating over dictionaries
## as it does not have the additional step of converting the keys to a list
print
for key in animals.keys():
print key, animals[key]
Dictionary based solution for finding the number of movies for an actor Then finding the actors with the highest number of movies This is an O(N) solution to compute the number of movies for each actor
## to find the top actors numlist = [] For each actor:
append [number of movies, actor] to numlist
sort numlist in reverse print top 3 values from numlist
Code:
"""
Dictionary based solution for finding the number of movies for an actor
Then finding the actors with the highest number of movies
This is an O(N) solution to compute the number of movies for each actor
actors is a dictionary
key: actor name
value: the set of movies the actor is in
For each actor, movie in the file:
if dictionary actors has the actor as a key:
add 1 to the number of movies for this actor
else:
add the actor to the actors dictionary as a key
with count 1 as value
## to find the top actors
numlist = []
For each actor:
append [number of movies, actor] to numlist
sort numlist in reverse
print top 3 values from numlist
"""
if __name__ == "__main__":
actors = {} ### key: actor name, value: number of movies
for line in open('imdb_data.txt'):
m = line.strip().split('|')
name = m[0].strip()
if name in actors:
##checks if name is in the set of keys for this dictionary
actors[name] += 1
else: ## a name we have not yet seen
actors[name] = 1
## now we will compute the actors with the highest number of movies
nummovie_list = [] ## [ [num_movies, actor name], .. ]
for name in actors: ## for each key in actors
nummovie_list.append ( [actors[name], name] )
nummovie_list.sort(reverse=True) ## highest value first
## though the second value, names are sorted in reverse order also
## print the top 20 values
for i in range(20):
print "%s (%d)" %(nummovie_list[i][1], \
nummovie_list[i][0])
Final dictionary example for movies
The program then asks repeatedly for the name of an actor and prints the set of movies for that actor
Code:
"""
Final dictionary example for movies
It constructs a dictionary with
key: actor name
value: the set of movies the actor is in
The program then asks repeatedly for the name of an actor
and prints the set of movies for that actor
"""
if __name__ == "__main__":
actors = {} ### key: actor name, value: set of movies
for line in open('hanks.txt'):
m = line.strip().split('|')
name = m[0].strip()
movie = m[1].strip()
if name in actors:
##checks if name is in the set of keys for this dictionary
actors[name].add( movie )
else: ## new name, initialize with a set containing the current movie
actors[name] = set( [movie] )
while True: ## ask for user input repeatedly
name = raw_input('Give me an actor (-1 to stop) ==> ')
if name == '-1':
break
if name not in actors:
print "I do not have this actor"
else:
print "Movies for this actor:"
## remember: actors[name] is a set, so we can iterate over it
for movie in actors[name]:
print "\t", movie
Class for creating and manipulating 2-dimensional points
Methods:
Method to initialize. x=0,y=0 provides default values. Example calls:
x = Point2d(5,10)
x = Point2d() ## same as x = Point2d(0,0)
Called when checking if two points are equal: pt1 == pt2. Returns a Boolean
Code:
"""
Class for creating and manipulating 2-dimensional points
"""
import math
class Point2d(object):
def __init__(self, x0=0, y0=0):
""" Method to initialize. x=0,y=0 provides default values.
Example calls::
x = Point2d(5,10)
x = Point2d() ## same as x = Point2d(0,0)
"""
self.x = x0
self.y = y0
def __str__(self):
""" Method to print the object """
return '(%d, %d)' %(self.x, self.y)
def scale(self, c):
""" Method to scale a point """
self.x *= c
self.y *= c
def magnitude(self):
""" Returns the magnitude of an object """
return math.sqrt( self.x**2 + self.y**2 )
def distance(self, other):
""" Returns the distance of an object to another """
dx = self.x - other.x
dy = self.y - other.y
return math.sqrt ( dx**2 + dy**2 )
def grid_distance(self, other):
""" Returns the grid distance between two points """
dx = self.x - other.x
dy = self.y - other.y
return abs(dx) + abs(dy)
def copy(self):
""" Returns a new object identical to the current one. """
return Point2d(self.x, self.y)
def __add__(self, other):
""" Called when adding two points: pt1 + pt2, returns a new object """
newx = self.x+other.x
newy = self.y+other.y
pt = Point2d(newx, newy) ## create a new object to return
return pt
def __sub__(self, other):
""" Called when subtracting two points: pt1 - pt2, returns a new object """
return Point2d( self.x-other.x, \
self.y-other.y )
def __eq__(self, other):
""" Called when checking if two points are equal: pt1 == pt2.
Returns a Boolean """
return self.x==other.x and self.y==other.y
def move(self, command):
command = command.lower()
if command == 'up':
self.y += 1
elif command == 'down':
self.y -= 1
elif command == 'left':
self.x -= 1
elif command == 'right':
self.x += 1
if __name__ == '__main__':
### first let us tests all the implemented methods
pt1 = Point2d(5, 10) ##cals to __init__
pt2 = Point2d(10, 20) ##cals to __init__
print pt1, pt2 ##cals to __str__
pt1.scale(10) ## function that returns no value is called like this
m = pt1.magnitude() ## function returns value but takes no arguments
print m
d = pt1.distance(pt2) ## function to find distance between two points
d2 = pt2.distance(pt1) ## which returns a value
print d, d2 ## the two different ways to call should be equal
pt3 = pt1+pt2 ## calls __add__
print pt3
pt3 = pt1-pt2 ## calls __sub__
print pt3
pt4 = pt1.copy()
print pt4
print pt4==pt1, '(True if copy works)' ## calls __eq__, they should be the same
print pt1==pt2, '(should be False)' ## they should be different
## Let us use the points to solve a previous homework
print
print 'HW solution'
pt1 = Point2d(5, 10)
cmd1 = ['up','down','left']
pt2 = Point2d(15, 3)
cmd2 = ['right','stay', 'down']
print "Wallace at:", pt1, "Gromit at:", pt2
for i in range(len(cmd1)):
pt1.move( cmd1[i] )
pt2.move( cmd2[i] )
print pt1, pt2, pt1.grid_distance(pt2)
Methods:
Code:
class Time(object):
def __init__(self, h, m, s):
"""Store time internally as seconds """
self.sec = s + m*60 + h*60*60
def __str__(self):
""" Print time externally as military time """
h = self.sec/3600
m = (self.sec-(h*3600))/60
s = self.sec - h*3600 - m*60
return '%02d:%02d:%02d' %(h,m,s)
def convert(self):
""" Convert time to its input form """
h = self.sec/(3600)
m = (self.sec - h*3600)/60
s = self.sec - h*3600 - m*60
return (h,m,s)
def __add__(self, other):
""" Take in time object, add to self, return new time object
"""
(h,m,s) = self.convert()
newTime = Time(h,m,s)
newTime.sec += other.sec
if newTime.sec >= 86400:
newTime -= 86400
return newTime
def __sub__(self, other):
""" Time in time object, subtract from self, return new time object
"""
(h,m,s) = self.convert()
newTime = Time(h,m,s)
newTime.sec -= other.sec
if newTime.sec < 0:
newTime.sec = 0
return newTime
def am_or_pm(self):
""" Is Time before or after 12:00:00 """
if self.sec < 43200:
return "AM"
else:
return "PM"
if __name__ == "__main__":
time1 = Time(5,5,5)
time2 = Time(12,0,0)
print str(time1+time2)
print str(time2-time1)
print time2.am_or_pm()
print time1.am_or_pm()
Address class. Could be expanded to separate out details (such as city, state, zip, etc.) to be returned individually
Methods:
Code:
"""
Address class. Could be expanded to separate out details (such as city,
state, zip, etc.) to be returned individually
"""
class Address(object):
def __init__(self,address):
self.address = address
def __str__(self):
return self.address
Restaurant class, uses the address class
Methods:
create new Restaurant object
Code:
"""
Restaurant class, uses the address class
"""
from Address import *
class Restaurant(object):
def __init__(self,name,latitude,longitude,address,url,category,ratings=list()):
""" create new Restaurant object """
self.name = name
self.latitude = latitude
self.longitude = longitude
self.address = Address(address)
self.url = url
self.category = category
self.ratings = ratings
def __str__(self):
""" create string of restaurant """
mystr = ""
mystr = self.name + str(self.address)
return mystr
def average_rating(self):
""" return average rating for restaurant """
if len(self.ratings) == 0:
return -1
return sum(self.ratings)/len(self.ratings)
def max_rating(self):
""" return max rating for restaurant if it exists """
if len(self.ratings) == 0:
return -1
maxValue = self.ratings[0]
for i in range(1,len(self.ratings)):
maxValue = max(maxValue,self.ratings[i])
return maxValue
def min_rating(self):
""" return min rating for restaurant if it exists """
if len(self.ratings) == 0:
return -1
return min(self.ratings)
Module for computing the Haversine distance between two latitude and longitude coordinates in miles
Code:
"""
Module for computing the Haversine distance between two
latitude and longitude coordinates in miles
"""
import math
def distance_from_lat_long( lat1, long1, lat2, long2 ):
# Convert to radians before applying the formulas
lat1 *= math.pi / 180.0
long1 *= math.pi / 180.0
lat2 *= math.pi / 180.0
long2 *= math.pi / 180.0
# Now the real work.
dlat = (lat1-lat2)
dlong = long1-long2
a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlong/2)**2
c = 2*math.atan2( math.sqrt(a), math.sqrt(1-a) )
R = 6371 / 1.609
return R*c
Shows the use of the Restaurant class in finding restaurants with a given criteria
Code:
"""
Shows the use of the Restaurant class in finding
restaurants with a given criteria
"""
from Restaurant import Restaurant
from lat_long_to_dist import *
def convert_input_to_restaurant(line):
"""Parses a single line of the yelp file, keeping some of the
data, and throwing away the rest.
"""
line = line.strip('\n')
values = line.split('|')
s_rating = values[6:]
scores = []
for s in s_rating:
scores.append( int(s) )
result = [ values[0], \
float(values[1]),\
float(values[2]), \
values[3], \
values[4], \
values[5], \
scores ]
restaurant = Restaurant(result[0],result[1],result[2],result[3].replace("+","\n"),result[4],result[5],scores)
return restaurant
def build_restaurant_list(filename):
""" Parses the given filename containing yelp data and
returns a list of restaurants. Each item is a list containing
restaurant information.
"""
restaurants = []
for line in open(filename):
new_r = convert_input_to_restaurant(line)
restaurants.append(new_r)
return restaurants
if __name__ == "__main__":
restaurants = build_restaurant_list("yelp.txt")
print restaurants
position = raw_input("Position ==> ").split(",")
for i in range(len(position)):
position[i] = float(position[i])
distance = float(raw_input("Distance ==> "))
rating = float(raw_input("Rating ==> "))
rType = raw_input("Type ==> ")
for restaurant in restaurants:
# if restaurant outside distance from given position
if (distance < distance_from_lat_long(position[0],position[1],restaurant.latitude,restaurant.longitude)):
continue
# if average rating for restaurant less than input rating
if (rating > restaurant.average_rating()):
continue
# check resturant is the same type
if (rType.lower() != restaurant.category.lower()):
continue
print restaurant
print
Illustrates how to convert a dictionary of hobbies with person as key and set of hobbies as value to a new dictionary with hobby as key, and the set of people with that hobby as value.
Code:
"""
Illustrates how to convert a dictionary of hobbies with
person as key and set of hobbies as value to a new dictionary
with hobby as key, and the set of people with that hobby as
value.
"""
if __name__ == "__main__":
hobbies = {'Gru' : set(['Hiking','Cooking']), \
'Edith' : set(['Hiking','Board Games'])}
new_dict = {}
for person in hobbies:
for h in hobbies[person]:
if h in new_dict: ## this hobby already exists in the dictionary
new_dict[h].add(person)
else: ## a new hobby we have not yet seen
new_dict[h] = set([person])
print new_dict
Illustrates the deletion from dictionaries and counting values from a dictionary
Code:
"""
Illustrates the deletion from dictionaries and counting values from
a dictionary
"""
if __name__ == "__main__":
peoples = {"Thomas" : "red", "Ashok" : "green", "Sandy" : "red", \
"Allison" : "orange", "Fei" : "blue", "Natasha" : "blue", \
"Brennah" : "blue" }
print peoples
peoples["Fei"] = "green"
print peoples
del peoples["Sandy"]
print peoples
colors = {}
#color: count
## find the number of people for each color
for person in peoples:
color = peoples[person]
if color in colors:
colors[color] += 1
else:
colors[color] = 1
## find the max value in all of the values for the dictionary
max_value = max(colors.values())
## find the color for the max value
for color in colors:
if colors[color] == max_value:
print color
Illustrates how to the last name from a file and constructs a dictionary of last names
Code:
"""
Illustrates how to the last name from a file and constructs
a dictionary of last names
"""
if __name__ == "__main__":
last_names = {} ## key: actor last name, value: set of actors with that last name
for line in open("imdb_data.txt"):
## first, split on "|" for different fields
words = line.strip().split('|')
## next, split the name at zero position on ","
## to separate out the first and last name
actor = words[0].strip().split(',')
## remove any additional spaces in all names
for i in range(len(actor)):
actor[i] = actor[i].strip()
## check if there was multiple names and if so
## add the first name of this actor the dictionary
if len(actor) > 1:
if actor[0] in last_names:
last_names[actor[0]].add(actor[1])
else:
last_names[actor[0]] = set([actor[1]])
## all actors with last name "Bacon"
print last_names["Bacon"]
Creates a dictionary from the IMDB data in which years for movies is a key and the names of the movies in that year is a value (as movies can be repeated for different actors, using a set is best here).
We can then use this dictionary to:
- print the movies in a given year
- find the number of movies in each year
Code:
"""
Creates a dictionary from the IMDB data in which
years for movies is a key and the names of the movies in that
year is a value (as movies can be repeated for different actors,
using a set is best here).
We can then use this dictionary to:
- print the movies in a given year
- find the number of movies in each year
"""
def get_year_movies():
imdb_file = "imdb_data.txt"
years_and_movies = {}
for line in open(imdb_file):
words = line.strip().split('|')
movie_name = words[1].strip()
year = int(words[2])
if year in years_and_movies:
years_and_movies[year].add(movie_name)
else:
years_and_movies[year] = set([movie_name])
return years_and_movies
def find_busiest_years1(years_and_movies):
# one way using lists and sorting to find top year
# slower than second method as it has complexity of n*log(n)+n
movie_count = []
for year in years_and_movies:
movie_count.append([len(years_and_movies[year]),year])
movie_count.sort(reverse=True)
print movie_count[0]
def find_busiest_years2(years_and_movies):
# using just dictionary and storing max as we go long
# faster method as it's just complexity of n
max_value = 0
max_year = 0
for year in years_and_movies:
if len(years_and_movies[year]) > max_value:
max_value = len(years_and_movies[year])
max_year = year
print max_value, max_year
def print_movies_in_a_year(years_and_movies):
""" Shows how to construct a string from a set of values
and print strings of a given length.
"""
max_length = 60
year = int(raw_input("Enter a year ==> "))
if year in years_and_movies:
mystr = "" ##used for printing multiple movies in a line
for movie in sorted( years_and_movies[year] ):
mystr += movie + ", "
if len(mystr)>max_length:
print mystr
mystr = ""
print mystr.strip().strip(",")
else:
print "This year is not found"
if __name__ == "__main__":
years_and_movies = get_year_movies()
## we will use this dictionary to do a number of things
## print movies in a given year using the dictionary
print_movies_in_a_year(years_and_movies)
## test the first solution to the busiest years
find_busiest_years1(years_and_movies)
## test the second solution to the busiest years
find_busiest_years2(years_and_movies)
Using the IMDB data, computes people with Bacon number 1: all people who starred in a movie with Kevin Bacon
Code:
"""
Using the IMDB data, computes people with Bacon number 1:
all people who starred in a movie with Kevin Bacon
"""
if __name__ == "__main__":
actors = {} ## key:actor, value: set of movies for that actor
movies = {} ## key:movie, value: set of actors in that movie
## first populate the above dictionaries
for line in open('imdb_data.txt'):
words = line.strip().split('|')
actor = words[0].strip()
movie_name = words[1].strip()
if actor in actors:
actors[actor].add(movie_name)
else:
actors[actor] = set([movie_name])
if movie_name in movies:
movies[movie_name].add(actor)
else:
movies[movie_name] = set([actor])
## now find the actors with Bacon degree 1
bacon_numbers = {0 : set(["Bacon, Kevin"]), \
1 : set([])}
for movie in actors["Bacon, Kevin"]: ## for each of Kevin Bacon's movies
bacon_numbers[1] |= movies[movie] ## set union all actors for the movie
## Remember: Kevin Bacon should not be in degree 1
bacon_numbers[1] = bacon_numbers[1] - bacon_numbers[0]
print bacon_numbers[1]
print len(bacon_numbers[1]), "actors"
Finds the number of common movies for all pair of actors Then, finds the actors with the highest number of common movies (returns only one pair of actors even if there are ties)
Code:
"""
Finds the number of common movies for all pair of actors
Then, finds the actors with the highest number of common movies
(returns only one pair of actors even if there are ties)
"""
if __name__ == "__main__":
actors = {} ## key: actor name, value: set of movies of that actor
## populate the actor dictionary
for line in open(imdb_file):
words = line.strip().split('|')
actor = words[0].strip()
movie_name = words[1].strip()
if actor in actors:
actors[actor].add(movie_name)
else:
actors[actor] = set([movie_name])
## the following finds the max number of common movies for
## any pair of actors
common_count = 0 ## variable to hold the maximum number of common movies
actor_names = [] ## the names of actors with the current max value
## double for loop to find all pair of actors
for actor in actors:
for actor2 in actors:
if actor == actor2:
continue
common = len(actors[actor] & actors[actor2])
if common > common_count:
common_count = common
actor_names = [actor,actor2]
print common_count,actor_names
Find smallest two values in a list
Code:
"""
Find smallest two values in a list
"""
import time
import random
def search4(L):
"""
low1,low2 are the first two values in a list
go through every element and update low1, low2
so that they are the smallest two elements
return low1, low2
O(N) solution -- linear, N is length of the list
"""
if len(L)<2:
return None, None
min1, min2 = L[0], L[1]
if min1 > min2:
min1, min2 = min2, min1
for i in range(2, len(L)):
if L[i] < min2:
if L[i] < min1:
min2 = min1
min1 = L[i]
else:
min2 = L[i]
return min1, min2
def search1(L):
"""
low1,low2 are the first two values in a list
go through every element and update low1, low2
so that they are the smallest two elements
return low1, low2
O(N) solution - linear, N is length of the list
"""
if len(L)<2:
return None, None
min1, min2 = L[0], L[1]
if min1 > min2:
min1, min2 = min2, min1
for i in range(2, len(L)):
if L[i] < min1:
min2 = min1
min1 = L[i]
elif L[i] < min2:
min2 = L[i]
return min1, min2
def search3(L):
"""
Create a copy of L
sort the copy
return the first two elements
O(N log N) solution due to sorting, N is length of the list
"""
if len(L)>1:
L1 = L[:]
L1.sort()
return L1[0], L1[1]
else:
return None, None
def search2(L):
"""
Create a copy of L
Use min to find smallest
Remove the min from smallest, and find the next min
Return the two values
O(N) solution, N is length of the list
"""
if len(L) < 2:
return None, None
L1 = L[:]
min1 = min(L1) ## O(N) operation
L1.remove(min1) ## O(N) operation
min2 = min(L1) ## O(N) operation
return min1, min2
if __name__ == "__main__":
maxrange = 10000000
## create a random list
L = range(maxrange)
random.shuffle( L )
## time run time of a program
start = time.time()
a,b = search1(L)
end = time.time()
print "Search 1 took", end-start
"""
start = time.time()
a,b = search2(L)
end = time.time()
print "Search 2 took", end-start
start = time.time()
a,b = search3(L)
end = time.time()
print "Search 3 took", end-start
"""
start = time.time()
a,b = search4(L)
end = time.time()
print "Search 4 took", end-start
Testing module for finding the two smallest values
Test cases: number of values in the list: 0, 1, 2, 2+ duplicates or not: min two are the same or not ordering of values: min two values are ordered or not location of min two: min two are the first/last values
Nose module: 1. A set of test functions, each function will be executed one -> test module is run properly in the if __name__ == “__main__” component 2. Each function will assert something (what should be true)
Code:
"""
Testing module for finding the two smallest values
Test cases:
number of values in the list: 0, 1, 2, 2+
duplicates or not: min two are the same or not
ordering of values: min two values are ordered or not
location of min two: min two are the first/last values
Nose module:
1. A set of test functions, each function will be executed one -> test module is run properly in the if __name__ == "__main__" component
2. Each function will assert something (what should be true)
"""
import nose
from lec19_search_smallesttwo import *
def test1():
a,b = search1( [] )
assert a==None and b==None
def test2():
a,b = search1( [1] )
assert a==None and b==None
def test3():
a,b = search1( [1,2] )
assert a==1 and b==2
def test4():
a,b = search1( [2,1] )
assert a==1 and b==2
def test5():
a,b = search1( [3,2,1] )
assert a==1 and b==2
def test6():
a,b = search1( [1,4,5,3,2] )
assert a==1 and b==2
def test7():
a,b = search1( [1,4,5,1,2] )
assert a==1 and b==1
if __name__ == "__main__":
nose.runmodule()
Find smallest two values in a list
Code:
"""
Find smallest two values in a list
"""
import time
import random
def search1(L):
"""
low1,low2 are the first two values in a list
go through every element and update low1, low2
so that they are the smallest two elements
return low1, low2
O(N) solution -- linear, N is length of the list
"""
if len(L)<2:
return None, None
min1, min2 = L[0], L[1]
loc1, loc2 = 0, 1
if min1 > min2:
min1, min2 = min2, min1
loc1, loc2 = 1, 0
for i in range(2, len(L)):
if L[i] < min2:
if L[i] < min1:
min2 = min1
min1 = L[i]
loc2 = loc1
loc1 = i
else:
min2 = L[i]
loc2 = i
return loc1, loc2
def search3(L):
"""
Create a copy of L
Use min to find smallest
Remove the min from smallest, and find the next min
Return the two values
O(N) solution -- linear, N is length of the list
"""
if len(L) < 2:
return None, None
L1 = L[:] ##O(N) to make a copy
min1 = min(L1) ##O(N) to find the min, written in C++
L1.remove(min1) ##O(N) to remove
min2 = min(L1) ##O(N) to find the min, written in C++
loc1 = L.index(min1)
if min1 == min2:
loc2 = L.index(min2, loc1+1)
else:
loc2 = L.index(min2)
return loc1, loc2
def search2(L):
"""
Create a copy of L
Use min to find smallest
Remove the min from smallest, and find the next min
Return the two values
O(N) solution -- linear, N is length of the list
"""
if len(L) < 2:
return None, None
L1 = L[:] ##O(N) to make a copy
min1 = min(L1) ##O(N) to find the min, written in C++
loc1 = L1.index(min1) ##O(N) to find the min, written in C++
L1.remove(min1) ##O(1)
min2 = min(L1) ##O(N) to find the min
loc2 = L1.index(min2) ##O(N) to find the min, written in C++
if loc2 >= loc1:
loc2 += 1 ##to account for removed item
return loc1, loc2
if __name__ == "__main__":
maxrange = 3000000
## create a random list
L = range(maxrange)
random.shuffle( L )
## time run time of a program
start = time.time()
a,b = search1(L)
end = time.time()
print "Search 1 took", end-start
start = time.time()
a,b = search2(L)
end = time.time()
print "Search 2 took", end-start
start = time.time()
a,b = search3(L)
end = time.time()
print "Search 3 took", end-start
Testing module for finding index of two smallest values
Test cases: number of values in the list: 0, 1, 2, 2+ duplicates or not: min two are the same or not ordering of values: min two values are ordered or not location of min two: min two are the first/last values
Nose module: 1. A set of test functions, each function will be executed one -> test module is run properly in the if __name__ == “__main__” component 2. Each function will assert something (what should be true)
Code:
"""
Testing module for finding index of two smallest values
Test cases:
number of values in the list: 0, 1, 2, 2+
duplicates or not: min two are the same or not
ordering of values: min two values are ordered or not
location of min two: min two are the first/last values
Nose module:
1. A set of test functions, each function will be executed one -> test module is run properly in the if __name__ == "__main__" component
2. Each function will assert something (what should be true)
"""
import nose
from lec19_indexof_smallesttwo import *
def test1():
a,b = search1( [] )
assert a==None and b==None
def test2():
a,b = search1( [1] )
assert a==None and b==None
def test3():
a,b = search1( [1,2] )
assert a==0 and b==1
def test4():
a,b = search1( [2,1] )
assert a==1 and b==0
def test5():
a,b = search1( [3,2,1] )
assert a==2 and b==1
def test6():
a,b = search1( [1,4,5,3,2] )
assert a==0 and b==4
def test7():
a,b = search1( [1,4,5,1,2] )
assert a==0 and b==3
if __name__ == "__main__":
nose.runmodule()
This module implements different sort functions. Each function returns the sorted list, but ins_list also changes the input list while merge_sort does not.
Merge sort is implemented two ways: 1. Iterative merge sort: merge_sort 2. Recursive merge sort: merge_sort_rec
Code:
"""
This module implements different sort functions.
Each function returns the sorted list, but ins_list also
changes the input list while merge_sort does not.
Merge sort is implemented two ways:
1. Iterative merge sort: merge_sort
2. Recursive merge sort: merge_sort_rec
"""
import random
def ins_sort(L):
"""
Insertion sort implementation.
"""
for i in range(1,len(L)):
##copy the value in L[i] to x
x = L[i]
j = i-1
while j>=0 and L[j]>x:
L[j+1] = L[j]
j -= 1
L[j+1] = x
return L
def merge(L1, L2):
"""Assume L1 and L2 are sorted,
merge and return a single new sorted list
"""
M = []
while len(L1)>0 and len(L2)>0:
if L1[0] < L2[0]:
M.append( L1.pop(0) )
else:
M.append( L2.pop(0) )
if len(L1)>0:
M.extend(L1)
else:
M.extend(L2)
return M
def merge_sort(L):
""" Iterative merge sort solution, uses merge as a subroutine """
if len(L)==0:
return []
tomerge = []
for item in L:
tomerge.append( [item] )
## get pairs of items from tomerge
## merge and put in a new list
while len(tomerge) > 1:
tomerge_new = []
while len(tomerge)>1:
L1 = tomerge.pop(0)
L2 = tomerge.pop(0)
L = merge(L1, L2)
tomerge_new.append(L)
if len(tomerge)>0:
tomerge_new.append( tomerge[0] )
tomerge = tomerge_new
return tomerge[0]
def merge_sort_rec(L):
""" Recursive merge sort solution, uses merge as a subroutine. """
if len(L) <= 1: ##a list of length 1 is already sorted
return L
else: ## lists 2 or more items: divide in half, sort each sublist and merge
mid = len(L)/2
L1 = merge_sort_rec( L[:mid] )
L2 = merge_sort_rec( L[mid:] )
return merge(L1, L2)
if __name__ == "__main__":
x = range(10)
y = x[:]
random.shuffle(y)
z = merge_sort_rec(y)
print z
print x==z
Testing code for Computer Science 1, Lecture 20 on sorting. This assumes that the sort functions are all in file sorts_sol.py, each taking one list as its only argument, and that their names are ins_sort and merge_sort. Assumes that methods return a sorted list.
All tests are based on random permutations of integers. Also try permutations that are almost sorted by only switching few pairs. We leave that as an exercise.
Code:
'''
Testing code for Computer Science 1, Lecture 20 on sorting. This
assumes that the sort functions are all in file sorts_sol.py, each taking
one list as its only argument, and that their names are ins_sort
and merge_sort. Assumes that methods return a sorted list.
All tests are based on random permutations of integers. Also try
permutations that are almost sorted by only switching few pairs.
We leave that as an exercise.
'''
import sort_sol
import time
import random
def run_and_time(name, sort_fcn, v, known_v):
'''
Run the function passed as sort_fcn, timing its performance and
double-checking if it correct. The correctness check is probably
not necessary.
'''
print "Testing " + name
t0 = time.time()
x = sort_fcn(v)
t1 = time.time()
print "Time: %.4f seconds" %(t1-t0)
print "Is correct?", x==known_v
print
def run_and_time_python_sort(v):
'''
Run and time the Python list sort function on the list.
'''
print "Running Python's list sort function"
t0 = time.time()
v.sort()
t1 = time.time()
print "Time: %.4f seconds" %(t1-t0)
print
####################################################
if __name__ == '__main__':
n = int(raw_input("Enter the number of values ==> "))
print "----------"
print "Running on %d values" %n
print "----------"
v = range(n)
v1 = v[:]
random.shuffle(v1)
v2 = v1[:]
v3 = v1[:]
v4 = v1[:]
#run_and_time("selection sort", sort_sol.ins_sort, v1, v )
run_and_time("merge sort", sort_sol.merge_sort, v2, v )
run_and_time("recursive merge sort", sort_sol.merge_sort_rec, v3, v )
# passing functions as an arg to a fcn
run_and_time_python_sort(v4 )
Example recursive functions
Code:
"""
Example recursive functions
"""
def blast(N):
"""
Simple example of recursion to show how the call stack
works with printing.
"""
if N > 0:
blast(N-1)
print N
else:
print "Blast off!"
def factorial(N):
"""
Idea: N! = N * (N-1)!
Use a recursive step for this
"""
if N <= 1:
return 1
else:
val = factorial(N-1)
return N * val
def fib(N):
"""
Generates the Nth fibonacci number recursively.
It is much better to use an iterative solution for this.
"""
if N < 2:
return N
else:
return fib(N-1) + fib(N-2)
if __name__ == "__main__":
for i in range(10):
print fib(i),
print
This modules draws the Sierpinki triangles up to a given depth using the Tkinter module. It illustrates the use of recursion in drawing self-similar patterns in smaller and smaller regions of the larger triangle.
See:
http://en.wikipedia.org/wiki/Sierpinski_triangle
Code:
"""
This modules draws the Sierpinki triangles up to a given depth
using the Tkinter module. It illustrates the use of recursion in
drawing self-similar patterns in smaller and smaller regions of the
larger triangle.
See:
http://en.wikipedia.org/wiki/Sierpinski_triangle
"""
import Tkinter as tk
import math
def sierpinski(chart_1, lowleft, top, lowright, level, maxlevel):
"""Recursive function to draw Sierpinski triangles in chart_1
within coordinates: lowleft, top, lowright.
At each call, the call level is increased. The function ends
when maxlevel is reached.
"""
if level == maxlevel:
return ##Base case to terminate the process.
else:
chart_1.create_polygon([lowleft, top, lowright], fill="red")
leftmid = (lowleft[0]+top[0])/2,(lowleft[1]+top[1])/2
rightmid = (lowright[0]+top[0])/2,(lowright[1]+top[1])/2
bottommid = (lowright[0]+lowleft[0])/2,(lowright[1]+lowleft[1])/2
chart_1.create_polygon([leftmid, rightmid, bottommid], fill="white")
chart_1.update()
##Recursive calls to redraw triangles in three corners of the
##current triangle area
level += 1
sierpinski(chart_1, lowleft, leftmid, bottommid, level,maxlevel)
sierpinski(chart_1, leftmid, top, rightmid, level,maxlevel)
sierpinski(chart_1, bottommid, rightmid, lowright, level,maxlevel)
def restart(chart):
"""Redraws the Sierpinski triangle, but increasing the depth
at each time.
"""
chart_1.delete(tk.ALL)
sierpinski(chart, (0,600), (300,600-300*math.sqrt(3)), (600,600), \
0, maxlevel_var[0])
maxlevel_var[0] += 1
if __name__ == "__main__":
root = tk.Tk()
root.title("Sierpinski Recursion Example")
chart_1 = tk.Canvas(root, width=600, height=600, background="white")
chart_1.grid(row=0, column=0)
## Initially max level is 1, which will draw
##a simple triangle with an inverted triangle inside.
maxlevel_var = [1]
restart(chart_1) ## Draw the Sierpinski triangles once
root.frame = tk.Frame(root)
root.frame.button = tk.Button(root.frame,\
text="quit", \
command=lambda:root.destroy())
root.frame.button2 = tk.Button(root.frame, \
text="draw again!", \
command=lambda:restart(chart_1))
root.frame.button.grid()
root.frame.button2.grid()
root.frame.grid()
root.mainloop()
Finding the modes of a list:
- Given a list L of integers, find the value that occurs most often
- Should function change the input list? NO
- Could there be more than one value that is the mode? YES
- N: number of values in the list
- M: number of distinct values in the list ( M < N or M << N ?)
- What other variations of this function I might be interested in?
Algorithm:
- Find the frequency of each value
- Find the max frequency
- All values with that frequency
Method 1: Dictionary.
- Dictionary: keys are distinct values, values are counts
- Find max of all values
3. Go through each key, and if the value is the max value, add key to the output
Code:
""" Finding the modes of a list:
- Given a list L of integers, find the
value that occurs most often
- Should function change the input list? NO
- Could there be more than one value that
is the mode? YES
- N: number of values in the list
- M: number of distinct values in the list ( M < N or M << N ?)
- What other variations of this function I might be interested in?
Algorithm:
1. Find the frequency of each value
2. Find the max frequency
3. All values with that frequency
Method 1: Dictionary.
1. Dictionary: keys are distinct values, values are counts
2. Find max of all values
3. Go through each key, and if the value
is the max value, add key to the output
Method 2: Set.
1. Find the distinct values using set(L)
2. Count the frequency for each value, put in a list and sort
3. Return the top elements in the list
Method 3: List.
1. Sort a copy of the list
2. Go through and count each item
3. Keep track of the mode and the corresponding values for
the mode (while iterating through the list)
"""
import time
import random
def modes_dict(L):
""" Overall complexity: O(N+M) """
if len(L) == 0:
return [], 0
counts = {}
for item in L: ## O(N)
if item not in counts: ## O(1)
# item is not in dictionary
counts[item] = 1 ## O(1)
else:
counts[item] += 1 ## O(1)
##Complexity so far: O(N)
mode_count = max ( counts.values() ) ## O(M)
mode_vals = []
for key in counts: ## M keys, so O(M)
if counts[key] == mode_count:
mode_vals.append ( key )
return mode_vals, mode_count
def modes_set(L):
""" Overall complexity: O(N+N*M+Mlog M) """
if len(L) == 0:
return [], 0
counts = []
##set(L) is O(N), finds M items
for item in set(L): ##execute for loop M times
c = L.count(item) ##O(N) for each item
counts.append( (c, item) )
## O(N) + O(N*M)
counts.sort( reverse=True ) ## O(M log M)
mode_count = counts[0][0]
mode_vals = []
## O(N/M) because about N/M items with a given frequency
for (c,item) in counts:
if c == mode_count:
mode_vals.append( item )
else:
break
return mode_vals, mode_count
def modes_list(L):
""" Overall complexity: O(N log N + N) """
if len(L) == 0:
return [], 0
L1 = L[:]
L1.sort() ## O(N log N)
cur_val = L1[0]
cur_count = 1
mode_vals = []
mode_count = 0
for i in range(1,len(L1)): ## O(N)
if L1[i] == cur_val:
cur_count += 1
else:
if cur_count > mode_count:
mode_vals = [cur_val]
mode_count = cur_count
elif cur_count == mode_count:
mode_vals.append( cur_val )
cur_val = L1[i]
cur_count = 1
if cur_count > mode_count:
mode_vals = [cur_val]
mode_count = cur_count
elif cur_count == mode_count:
mode_vals.append ( cur_val )
return mode_vals, mode_count
def time_alg(f, L):
start = time.time()
f(L)
end = time.time()
print "%s: %f seconds" %( f.__name__, end-start)
def random_list(N, divider):
## The number of distinct values (M) = N/divider
L = []
for i in range(N):
x = random.randint(1, N/divider)
L.append(x)
return L
if __name__ == "__main__":
L = [1,2,3,4,2,2,4,4,5,6]
L2 = [1,2,3,4,2,2,2,4,4,5,6]
print "Testing with different N, fixed M"
print
N = 1000
divider = 10
for i in range(3):
L = random_list(N, divider)
print "L has", N, "values", N/divider, "distinct values"
time_alg(modes_dict, L)
time_alg(modes_set, L)
time_alg(modes_list, L)
print
N *= 10
divider *= 10
print "Testing with different M (number of distinct values), fixed N"
print
N = 100000
divider = 100
for i in range(3):
L = random_list(N, divider)
print "L has", N, "values", N/divider, "distinct values"
time_alg(modes_dict, L)
time_alg(modes_set, L)
time_alg(modes_list, L)
print
divider *= 10