Code Developed in CSCI-1100¶
Lecture 1¶
Module: lec1_three_doubles
— Finds three consecutive pairs of double letters¶
Find all words containing three consecutive pairs of double letters in a file of all English words located at:
Modules used: urllib
Author: Sibel Adali <adalis@rpi.edu>, Chuck Stewart <stewart@cs.rpi.edu>
Returns: All words matching condition and the count of found words
Pseudo Code:
open the file from the web with all the words in English
for each word in the file:
for all positions l in the word
if letters at positions (l and l+1) and (l+2 and l+3) and
(l+4 and l+5) are the same then
output word and increment the count
Code:
""" Find all words containing three consecutive pairs of double letters
in a file of all English words located at:
http://thinkpython.com/code/words.txt
**Modules used:** :py:mod:`urllib`
**Author**: Sibel Adali <adalis@rpi.edu>, Chuck Stewart <stewart@cs.rpi.edu>
**Returns:** All words matching condition and the count of found words
**Pseudo Code**::
open the file from the web with all the words in English
for each word in the file:
for all positions l in the word
if letters at positions (l and l+1) and (l+2 and l+3) and
(l+4 and l+5) are the same then
output word and increment the count
"""
__version__ = '1'
import urllib
def three_double(word):
""" Returns True if the word contains three consecutive pairs of
double letters and False otherwise.
"""
for l in range(len(word)-5):
if word[l] == word[l+1] and \
word[l+2]==word[l+3] and \
word[l+4]==word[l+5]:
return True
return False
# Comments that fit in a single line can be put in this format.
# Anything after a single pound sign is ignored.
# Main body of the program starts here
word_url = 'http://thinkpython.com/code/words.txt'
word_file = urllib.urlopen(word_url)
count = 0
for word in word_file:
word = word.strip().strip('\n')
if three_double(word):
print word
count = count + 1
if count == 0:
print 'No words found'
else:
print count, 'words are found'
Lecture 4¶
Module: lec4_examples
— Examples programs from Lecture 4¶
Code:
"""
Write a program to
read radius of a circle in float
compute its area and
print it.
"""
import math
radius = raw_input("Radius ==> ")
radius = float(radius)
## or
## radius = float(raw_input("Radius ==> "))
area = math.pi * radius**2
print "The area of a circle is:", area
print "The area of a circle is: %.2f" %area
"""
Write a program to
read the first name and last name of a person
print name in two ways:
firstname lastname
lastname, firstname
"""
fname = raw_input("First name ==> ")
print fname
lname = raw_input("Last name ==> ")
print lname
print fname + " " + lname
print fname, lname
print lname + ", " + fname
print lname, ",", fname
## formatted output
print "%s %s" %(fname, lname)
print "%s, %s" %(lname, fname)
"""
Write a program to read three values,
find and print their
average,
min and max,
average of the smallest two values.
"""
val1 = float(raw_input("Value 1 ==> "))
val2 = float(raw_input("Value 2 ==> "))
val3 = float(raw_input("Value 3 ==> "))
avg = (val1+val2+val3)/3
print "The average value is: %.2f" %avg
minval = min(val1, val2, val3)
print "The min value is: %.2f" %minval
maxval = max(val1, val2, val3)
print "The max value is: %.2f" %maxval
avgmintwo = (val1+val2+val3 - max(val1,val2,val3))/2
print "Average of min two values is: %.2f" %avgmintwo
name2 = "Rensselaer Polytechnic Institute"
##a -> 1, e->a, 1->e
name2 = name2.replace("a","1")
name2 = name2.replace("e","a")
name2 = name2.replace("1","e")
print name2
word = 'Bring back the swarm'
word = word.title()
word = word.replace(" ", "")
word = "#" + word
print word
word = 'Bring back the swarm'
word = "#" + (word.title()).replace(" ","")
print word
Lecture 5¶
Module: lec5_examples
— Examples programs from Lecture 5¶
Code:
"""
Write a function that takes the name of a person
and places all wovels outside (in alphabetical order)
For example:
meeseek
eeeemsk
amos
aoms
This illustrates the use of a function that calls a function
and a function with multiple parameters.
"""
def put_outside_letter(word, letter):
word = letter * word.count(letter) + word.replace(letter,"")
return word
def put_outside(word):
word = put_outside_letter(word,"u")
word = put_outside_letter(word,"o")
word = put_outside_letter(word,"i")
word = put_outside_letter(word,"e")
word = put_outside_letter(word,"a")
return word
######################### Main body of code
inputword = raw_input("Enter a word ==> ")
outword = put_outside(inputword)
print outword
"""
Examples functions from the exercise
"""
import math
def frame_word(word):
fstr = "*"*(len(word)+4) + "\n"
fstr += "* " + word + " *\n"
fstr += "*"*(len(word)+4)
return fstr
def area_circle(radius):
area = math.pi* radius**2
return area
#################
print area_circle(2)
"""
Write a function that computes the length of
the hypotenuse of a triangle, given the lengths
of its legs (remember: A^2+B^2 = C^2)
Use this function to read the lengths of edges and
compute and print the hypothenuse.
Show the program structure and functions
"""
import math
def hypotenuse(a, b):
c = math.sqrt( a**2 + b**2 )
return c
################## main body of the program
aval = float(raw_input("Enter a ==> "))
bval = float(raw_input("Enter b ==> "))
cval = hypotenuse(aval,bval)
print "C value is %.2f" %cval
"""
Write a function that generates a thank you note
given the person who gave the gift and the gift.
"""
def send_thankyou(person, gift):
outstr = "Dear %s,\n Thank you for your thoughtful gift of %s.\n" \
"It was very nice of you to think of me.\n" \
"My first weeks at Rensselaer have been crazy." %(person, gift)
return outstr
########### main body of the program
print send_thankyou("Uncle", "rocket ship")
Lecture 6¶
Module: lec6_examples
— Examples programs from Lecture 6¶
Code:
"""
Write a function that generates a thank you note
given the person who gave the gift and the gift.
This is an example of a function with that returns
nothing. Notice how this function is called differently
than a function that returns a value.
"""
def send_thankyou(person, gift):
outstr = "Dear %s,\n Thank you for your " \
"thoughtful gift of %s.\n" \
"It was very nice of you to think of me.\n" \
"My first weeks at Rensselaer have been crazy." \
%(person, gift)
print outstr
def send_thankyou2(person, gift):
outstr = "Dear %s,\n Thank you for your " \
"thoughtful gift of %s.\n" \
"It was very nice of you to think of me.\n" \
"My first weeks at Rensselaer have been crazy." \
%(person, gift)
return outstr
########### main body of the program
## calling function that returns nothing
send_thankyou("Uncle", "rocket ship")
print
## calling function that returns a string
print send_thankyou("Uncle", "rocket ship")
mins = int(raw_input("How long did you work out today (mins)? "))
print "You worked out", mins, "minutes"
if mins >= 60:
print "You are crushing it!"
print "Keep up the great work!"
if mins >= 20 and mins < 60: #### if mins between 20 and 60
print "That is great!"
print "Great job!"
if mins < 20:
print "You can benefit more from a longer exercise"
print "You can work even harder next time"
######################################
### Alternate equivalent solution ###
######################################
mins = int(raw_input("How long did you work out today (mins)? "))
print "You worked out", mins, "minutes"
if mins >= 60:
print "You are crushing it!"
print "Keep up the great work!"
elif mins >= 20: #### if mins between 20 and 60
print "That is great!"
print "Great job!"
else mins < 20:
print "You can benefit more from a longer exercise"
print "You can work even harder next time"
Lecture 7¶
Module: lec7_examples
— Examples programs from Lecture 7¶
Code:
"""Example of using tuples to return two things
or make multiple assignment
"""
def return_two_things():
return 'a', 2
########
x = return_two_things()
print x
name, cnt = return_two_things()
print name, cnt
## Swap two values using multiple assignment
a = 4
b = 3
print a,b
a,b = b,a ## swap values of a and b
print a,b
"""
Purpose: Example image program
Reads an image, crops four equal sizes images
from it, pastes them into another image in
different order and saves the resulting image
The image used in this program can be found on Piazza under
Resources->Resources
"""
from PIL import Image
im = Image.open("swarm1.jpg")
print "Size", im.size
print "Mode", im.mode
print "Format", im.format
im1 = im.crop((0,0,300,400))
im2 = im.crop((300,0,600,400))
im3 = im.crop((0,400,300,800))
im4 = im.crop((300,400,600,800))
imnew = Image.new("RGB", (600,800), "White")
imnew.paste( im3, (0,0) )
imnew.paste( im1, (300,0))
imnew.paste( im2, (0,400))
imnew.paste( im4, (300,400))
imnew.save("scrambled_swarm.jpg")
"""
This module prints boolean algebra tables
Save this in a file called ``truth_table.py``
"""
def print_and_table():
"""Prints the AND truth table. """
print "C1 AND C2 (ex:1>2 AND 3>2)"
print "TRUE AND TRUE = TRUE"
print "FALSE AND TRUE = FALSE"
print "TRUE AND FALSE = FALSE"
print "FALSE AND FALSE = FALSE"
def return_or_table():
"""Returns the OR truth table. """
str = "C1 OR C2 (ex:1>2 OR 3>2)\n"
str += "TRUE OR TRUE = TRUE\n"
str += "FALSE OR TRUE = TRUE\n"
str += "TRUE OR FALSE = TRUE\n"
str += "FALSE OR FALSE = FALSE"
return str
Lecture 9¶
Module: lec9_examples
— Examples programs from Lecture 9¶
Code:
"""
This is a general program for practicing different loop
methods. The following are examples of:
1. Loops that count up or down, loops that end
Loop block
Printing (print all on one line)
Changing a list (make upper case)
Accumulate a value
Count by 1, 3 (print three in a line, check
for three letters in a word, ending conditions?)
Is it true that there are two consecutive repeated letters?
Count all farmer's markets in Albany
2. Loops that are undeterministic
Depends on an external condition
(while user does not say stop)
------ WE will continue with these the next time
Depends on a complex condition
(while found or end of list, farmer's market)
3. Double loops
Find all possible pairs of agents
Find pairs of agents with the same first letter in their name
"""
months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', \
'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
agents = ['skye','may','fitz','simmons','caulson',\
'hunter', 'mack', 'morse', 'triplett', \
'hartley', 'deathlok', 'koenig', \
'gonzales', 'fury']
word = 'bookkeeper'
i=0
while i< len(months):
print "Month: %d is %s" %(i+1, months[i].capitalize() )
i += 1
print months
print
print agents
## Capitalize the values in list "agents"
i=0
while i < len(agents):
agents[i] = agents[i].capitalize()
i += 1
## Check if capitalized
print
print agents
##Count how many agents have first names that start with S
##Accumulation of values
i = 0
cnt = 0
while i < len(agents):
if agents[i][0] == 'S':
cnt += 1
i += 1
print "%d agents with first name starts with 'S'" %cnt
##Find if there any agents who name starts with F
##Print Yes if any agent with such name, No otherwise
i = 0
found = False
while i < len(agents):
if agents[i][0] == 'F':
found = True
i += 1
if found:
print 'Yes'
else:
print 'No'
print
print "This is a loop that counts up to 7"
i = 0
while i < 7:
i += 1
print i
print
print "This is a loop that counts 10 down to 1"
i=10
while i > 0:
print i
i -= 1
print
print "This is loop that counts by 2, starting with 1 and ending with 19"
i = 1
while i <= 20:
print i
i +=
"""
The Farmer's market CSV file is available on Piazza under
Resources->Resources->lec9overview.zip
Get list of Farmer's Markets, each market has the
following fields:
County Market Name Location
Address Line 1 City State Zip Contact Phone
Market Link Operation Hours Operation Season
FMNP Operating Months
Latitude Longitude EBT/SNAP
Location Points
"""
def get_markets():
markets = []
i = 0
for line in open('Farmers_Markets.csv').read().split("\r"):
i += 1
if i == 1:
continue
m = line.strip().split(",")
markets.append(m)
return markets
## markets is a list of lists
markets = get_markets()
print markets[0]
print markets[1]
county = raw_input("Input a county ==> ")
county = county.capitalize()
## Find and print all market names and address in
## Rensselaer County
## Display number of found markets
i = 0
cnt = 0
while i < len(markets):
if markets[i][0] == county:
print markets[i][1]
cnt += 1
i += 1
print "%d markets found in %s" %(cnt, county)
""" This program checks given a word, whether it has
one or two double consecutive letters. This is an example
of how to iterate over the letters of a word using a while
loop.
"""
word = 'bookkeeper'
##Checks for single double letter
i = 0
found = False
while i < len(word)-1:
if word[i] == word[i+1]:
found = True
i += 1
if found:
print "The word has a single double letter"
word = 'ballon'
##Checks for double double letter
i = 0
found = False
while i < len(word)-3:
if word[i] == word[i+1] and \
word[i+2] == word[i+3] :
found = True
i += 1
if found:
print "The word has a double double letter"
""" This program shows how to write a loop that ends
on a given user input. So, we must make sure that
while loop executes once initially and also that we
set the conditions that would stop the loop manually
when the correct input is given.
"""
### write a loop that reads user input, until the
### user types stop
finished = False
while not finished:
cmd = raw_input("Enter a command (stop to stop) => ")
if cmd == 'stop':
finished = True
Lecture 10¶
Module: lec10_examples
— Examples programs from Lecture 10¶
Code:
"""
Overview of Lecture 10, understanding the difference between
list aliasing and copying
Simple value: int, float, string, Boolean, tuple
Container: list (set and dictionary)
a = b
If b has a simple value, then we make a copy
of its value and assign to a
If b is a list (or any container), a is an
alias to b [they point to the same object]
If variable b is passed as an argument to a
function, if b has a simple value, then we
make a copy of its value
Otherwise, we create an alias to the same
object
The following functions create a new list (they are not aliases):
Concatenation: x+y
Replication: x*10
Shallow Copy: list(x)
Slicing: x [i:j]
The copy is shallow! This means that only the first level of
values are copied in the list.
"""
## Example 1: Since each animal is a string, the value is copied into
## variable a in the for loop! Hence, capitalizing a does not change
## the value in the list
animals = ['cat','dog','pig','unicorn','dolphin','fish','rabbit']
for a in animals:
a = a.capitalize()
print a
print animals
## Example 1: Since each value in the list is also a list, the variable a becomes an
## alias for each sublist in the for loop. Changing a in the loop also changes the
## values in the list
values = [ [1,2], [3,4] ]
for a in values:
a.append(100)
print a
print values
""" This program illustrates the use of functions that take
as input a list and while loops that end on an external condition
"""
def cap_value(mystr):
mystr = mystr.capitalize()
return mystr
def arrange_values(mylist):
"""When we sort in the function, the actual list is also
sorted since mylist is an alias to the argument used.
"""
mylist.sort()
myvals = []
finished = False
while not finished: ## an indefinite while loop, continues until an outside condition is met
newval = raw_input("Enter a value (stop to end) => ")
if newval == 'stop':
finished = True
else:
newval = cap_value(newval) ## pass by value since string is a simple value, a return is needed
myvals.append(newval)
arrange_values(myvals) ## pass by alias, the list is modified in the function without a return
print myvals
Lecture 11¶
Module: lec11_examples
— Examples programs from Lecture 11¶
Code:
""" Example functions from Lecture 11 on if statements
"""
import math
def is_intersect(x0,y0,r0,x1,y1,r1):
""" Returns true if two circles intersect.
Circle 1 is at (x0,y0) with radius r0
Circle 2 is at (x1,y1) with radius r1
Check the distance between the centers, it should
be less than the sum of radii
"""
dist = math.sqrt( (x0-x1)**2 + (y0-y1)**2 )
if dist <= r0+r1:
return True
else:
return False
def find_bigger_semester(sem1, sem2):
""" Given two semesters as tuples of semester and year
return the semester that is later in the year.
"""
s1, y1 = sem1
s2, y2 = sem2
if y1 < y2:
return sem2
elif y2 < y1:
return sem1
elif s1 == 'Fall' and s2 == 'Spring':
return sem1
elif s2 == 'Fall' and s1 == 'Spring':
return sem2
else:
return s1 ##they are the same, return any one
"""
Code to compare three values, a,b,c
"""
def find_ordering(a,b,c):
""" Orders three values using nested if statements """
## a is the smallest value
if a == min(a,b,c):
if b<= c:
print "a, b, c"
else:
print "a, c, b"
elif b == min(a,b,c):
if a <= c:
print "b, a, c"
else:
print "b, c, a"
else: ## c is the min value
if a <= b:
print "c, a, b"
else:
print "c, b, a"
def find_ordering2(a,b,c):
""" Orders three values using if/elif statements,
all possible orderings are: abc, acb, bac, bca, cab, cba
"""
if a <= b <= c: ## this is Python syntax, would not work in all languages
print "a, b, c"
elif a <= c <= b:
print "a, c, b"
elif b <= a <= c:
print "b, a, c"
elif b <= c <= a:
print "b, c, a"
elif c <= a <= b:
print "c, a, b"
else:
print "c, b, a"
print find_bigger_semester( ('Fall', 2013), ('Spring', 2014))
""" This program illustrates the use of randomness in programs
A drunk is in a platform and moves randomly left or right
(half the time to the left, and half the time to the right)
We simulate this until the drunk falls off the platform
We also illustrate program structure
Pseudo code:
Read the length of a platform
Put the drunk in the middle of the platform
Print the platform with the man in it
while the drunk is still in the platform:
randomly move the drunk
print the platform
"""
### import statements
import random
### function definitions here
def print_platform(iteration, location, length):
left = location-1
right = length - location
print ("%4d " %iteration) +\
"-"*left + "X" + "-"*right
#raw_input(" <enter> ")
### The part below is not executed when we import this program
### but it will execute when we run the program
if __name__ == "__main__":
n = int(raw_input("Enter length of platform => "))
loc = n/2
iteration = 1
while loc > 0 and loc <= n:
print_platform(iteration, loc, n)
next = random.random()
if next > 0.5:
loc += 1
else:
loc -= 1
iteration += 1
Lecture 12¶
Module: lec12_examples
— Examples programs from Lecture 12¶
Code:
""" Finds the closest points of a list by finding
the pair with the smallest distance
"""
import math
def find_distance(point1, point2):
"""Returns the Cartesian distance from two points
point1, point2 are (x,y) tuples"""
x1,y1 = point1
x2,y2 = point2
distance = math.sqrt( (y2-y1)**2 + (x2-x1)**2 )
return distance
if __name__ == '__main__':
# Collection of coordinate points
points = [ (1,5), (13.5,9), (10,5), (8,2), (16,3) ]
# Show this list to the user
print 'All points:', points
# 1. Set a default "minimum" value
# Best option: the distance between the first two points
min_distance = find_distance(points[0], points[1])
# 2. Store the points that created the min_distance
# For now, it is the first 2 points.
closest_points = (points[0], points[1])
# 3. Compare all points to each other
# For each point...
for p in points:
# For every other point...
for q in points:
if p != q:
# Calculate the distance
dist = find_distance(p,q)
# Is it smaller than our current min_distance?
if dist < min_distance:
# If so, update the min_distance!
min_distance = dist
# And update the closest_points!
closest_points = (p,q)
# end inner for (ready to move on to the next point)
# end outer for (finished comparing all points)
# Now, min_distance and closest_points hold our answer!
print "\nMinimum distance is: %.2f between points %s" \
% (min_distance, closest_points)
## You can modify the for loop to avoid comparing the same
## two coordinates more than once by using ranges. Try it!
""" Practice using ranges to iterate through a list.
Now you have an index to find your place!
"""
if __name__ == '__main__':
planets = ['Mercury', 'Venus', 'Earth',
'Mars', 'Jupiter', 'Saturn', 'Uranus',
'Neptune', 'Pluto']
print "PLANETS GOING FORWARD:"
for i in range(len(planets)):
print "%d: %s" % (i+1, planets[i])
print "\nPLANETS GOING BACKWARD:"
for i in range(len(planets)-1, -1, -1):
print "%d: %s" % (i+1, planets[i])
print "\nEVERY OTHER PLANET:"
for i in range(0, len(planets), 2):
print "%d: %s" % (i+1, planets[i])
## New example:
## Use the indexes to print the planets' neighbors
print "\nNEIGHBORS"
for i in range(1, len(planets)-1):
print "%s >>>> %s >>>> %s" % (planets[i-1], planets[i], planets[i+1])
## Do you see why the range has to be set from 1 to len-1?
""" Compares the weights of two rats and finds the
first day on which Rat 1 is heavier than Rat 2
"""
# Weights of two rats
L1 = [3.7, 4.1, 4.9, 5.4, 6.0]
L2 = [4.5, 4.7, 5.1, 5.3, 5.9]
i = 0
while i < len(L1) and i < len(L2):
if L1[i] > L2[i]:
print "On day %d, Rat 1 (%.1f) is heavier than Rat 2 (%.1f)" \
%(i, L1[i], L2[i])
break # Stops the loop on the first occurrence
i += 1
print "\nOUTSIDE THE LOOP"
Lecture 13¶
Module: lec13_examples
— Examples programs from Lecture 13¶
Code:
""" Example of reading and writing a file.
Remember:
1. when reading lines, the newline at the end ofl line is also read
2. when writing lines, you must explicitly add a newline
"""
if __name__ == "__main__":
f = open("census.txt")
line1 = f.readline() ## read only one line
line2 = f.read() ## read the rest of the file as a single string
line3 = f.readline() ## this should return empty string (end of file already)
f.close()
f = open("census.txt")
line4 = f.readline() ## we are back to the beginning of the file
print "line1", line1
print "line2", line2
print "line3", line3
print "line4", line4
fout = open("census_out.txt", "w")
fout.write(line1) ## example writing to file
fout.write(line2)
fout.write(line3)
fout.write(line4)
fout.close()
""" Reads each line of the census data, splits
and finds all information relevant to Albany.
"""
if __name__ == "__main__":
f = open("census_data.txt")
i = 0
for line in f:
m = line.strip().split("\t")
i += 1
if m[0].startswith('Albany'):
print "%s: Population: %s" %(m[0], m[1])
""" Write a program to read scores.txt
each line containing a score, and then
print scores in decreasing order (with
index on the left.)
Algorithm:
Read the file into a list
Sort the list (reverse)
Print the list
"""
if __name__ == "__main__":
nums = []
f = open("scores.txt")
for line in f:
num = int(line.strip())
nums.append(num)
nums.sort(reverse=True)
for i in range(len(nums)):
print "%d: %d" %(i, nums[i])
""" This program illustrates how to read a line on the web as a file.
Simply use the urllib.urlopen method instead of the open method of a file.
Reading is identical to the one for files.
"""
import urllib
def print_line(line):
print line
if __name__ == "__main__":
f = urllib.urlopen("http://www.cs.rpi.edu/~sibel/csci1100/fall2015/_sources/course_notes/lec13_files_web.txt")
i = 0
for line in f:
print line.strip()
i += 1
if i==10:
break
Lecture 14¶
Module: lec14_examples
— Examples programs from Lecture 14¶
Code:
""" Simple program for reading through a file (in this case imdb file
containing the list of actors, movies and year, and finding all the
movies by an actor. The file can be found under Resources in Piazza.
Note. The file is very large, 200K+ lines.
"""
if __name__ == "__main__":
name = raw_input("Enter an actor => ")
for line in open("imdb_data.txt"):
m = line.strip().split("|")
actor = m[0].strip()
if actor == name:
print m[1].strip()
""" This program illustrates the use of sets to find the number of
actors in the data file "imdb.txt". In particular, the use of sets
here is a big efficiency save, as checking
value in set
is significantly faster than checking value in list.
"""
if __name__ == "__main__":
f = open("imdb_data.txt")
actors = set([])
lineno = 0
for line in f:
lineno += 1
## This is just to illustrate how quickly the program is running
if lineno %1000 == 0:
print "Line no", lineno
m = line.strip().split("|")
actor = m[0].strip()
movie = m[1].strip()
year = int(m[2])
actors.add ( actor )
print "%d actors are found" %(len(actors))
Lecture 15¶
Module: lec15_examples
— Examples programs from Lecture 15¶
Code:
"""Use hanks.txt, find for each actor
how many movies they have starred in.
This solution does not use dictionaries
We are looking at complexity of solutions:
Solution 1: Uses a list of lists for each actor
nummovies = [ [actor, numberofmovies], ...]
Algorithm:
for each line in the movie file:
find the index of the list that has
the current actor
add 1 to the number of movies
Analysis:
Searching if an actor is in the list may require
checking each entry in the list, (linear time O(n))
This list look up is repeated for each line in the movie
If m lines in the movie, total time roughly: O(n*m)
Note that number of actors is similar to number of lines
as each movie has few actors, so if n=m, we get O(n^2),
a quadratic solution.
Check to see that this is a very slow program.
"""
def parse_line(line):
m = line.strip().split("|")
actor = m[0].strip()
movie = m[1].strip()
year = int(m[2])
return actor, movie, year
def find_actor(nummovies, a):
for i in range(len(nummovies)):
if nummovies[i][0] == a:
return i
nummovies.append ( [a, 0] )
return len(nummovies)-1
if __name__ == "__main__":
nummovies = []
##open the file
f = open("hanks.txt")
i = 0
##read line by line
for line in f:
##debugging code to see the progress of the program
i += 1
if i%10000 == 0:
print "Line", i
##find actor, movie, year
a,m,y = parse_line(line)
## find actor index
idx = find_actor(nummovies, a)
## add 1 for movies
nummovies[idx][1] += 1
name = raw_input("Enter an actor => ")
idx = find_actor(nummovies, name)
print "%s starred in %d movies" \
%(name, nummovies[idx][1])
"""Use hanks.txt, find for each actor
how many movies they have starred in.
This is a new solution that uses dictionaries
actors: dictionary
key: name of actor
value: number of movies by this actor
for each line in the movie file:
if actor is not in keys, add him/her with zero movies
add 1 to the number of movies for this actor
Complexity analysis:
Finding if an actor is in the key of a dictionary (actor in actors)
is constant time, does not depend on the number of actors in the
dictionary. So, O(1)
We repeat this for each line of the file, so for n lines we get
O(1*n) = O(n) algorithm. Compare with the lec15_ex1.py. This is a
much cheaper algorithm. It will run much faster.
"""
def parse_line(line):
m = line.strip().split("|")
actor = m[0].strip()
movie = m[1].strip()
year = int(m[2])
return actor, movie, year
if __name__ == "__main__":
actors = {}
##open the file
f = open("imdb_data.txt")
i = 0
##debugging code to see the progress of the program
for line in f:
i += 1
if i%10000 == 0:
print "Line", i
a,m,y = parse_line(line)
## find actor index
if a not in actors: ##if this is the first time we see this actor
actors[a] = 1
else: ## actor key already exists
actors[a] += 1
name = raw_input("Enter an actor => ")
if name in actors: ## set look up
print "%s starred in %d movies" %(name, actors[name])
"""Use hanks.txt, find for each actor
how many movies they have starred in,
also print the movies.
This is a slight variation on lec15_ex2.py
We will still use a dictionary, but also store
the set of movies for each actor as value.
actors: dictionary
key: name of actor
value: set of movies by this actor
for each line in the movie file:
if actor is not in keys, add him/her with zero movies
add 1 to the number of movies for this actor
Complexity:
Finding a key in dictionary: O(1)
Adding a value to a set: O(1)
We repeat both for each line of the file, if n lines
total complexity is still O(n)
"""
def parse_line(line):
m = line.strip().split("|")
actor = m[0].strip()
movie = m[1].strip()
year = int(m[2])
return actor, movie, year
if __name__ == "__main__":
actors = {}
##open the file
f = open("imdb_data.txt")
##read line by line
for line in f:
a,m,y = parse_line(line)
## find actor index
if a not in actors:
actors[a] = set()
actors[a].add(m)
name = raw_input("Enter an actor => ")
if name in actors: ## set look up
print "%s starred in %d movies" %(name, len(actors[name]))
print "Movies"
for m in actors[name]:
print m
Lecture 16¶
Module: lec16_examples
— Examples programs from Lecture 16¶
Code:
"""
Time Complexity review:
Constant time, O(1)
does not depend on the size of the data
(list/dictionary/set)
value in set
key in dictionary
append to a list
add a value to a set
Linear time, O(n)
depends linearly on the data, twice the
size of the data, twice the length of time the program will take
value in list
Quadratic time, O(n^2)
Some sort of double loop, scales quadratically,
twice the data, 4 times slower for example
Double loops over n items are quadratic. Example:
for i in range(n):
for j in range(n):
print i,j
This is O(n^2) complexity
Worse stuff:
O(n^3): involves triple loop
Lots of matrix operations are cubed complexity and
are very costly
Space complexity:
There is an equivalent notion of space complexity, how much
data you keep in memory. For fast programs, it is equally important
not to keep unnecessary data or make multiple copies. You will
see this in future classes.
"""
""" Examples of more complex dictionaries
in which value can be different things.
This program shows how to take one dictionary
with set of values as keys, and create a
new dictionary where the keys are values from the
first dictionary.
"""
if __name__ == "__main__":
### dictionary where
### key: name
### value: set of hobbies for that person
characters = {}
characters['Gru'] = set(['World domination','Dancing'])
characters['Minion'] = set(['Floating', 'Dancing'])
characters['Margo'] = set(['Dancing', 'World domination'])
## print information from characters in a line
for person in sorted(characters.keys()):
line = "%s: " %(person.capitalize())
for hobby in sorted(characters[person]):
line += "%s, " %hobby
print line.strip().strip(",")
## we will create the reverse dictionary
hobbies = {}
### key: a hobby from characters dictionary
### value: set of names with that hobby
for name in characters:
for hobby in characters[name]:
if hobby in hobbies:
hobbies[hobby].add(name)
else:
hobbies[hobby] = set([name])
print hobbies
""" Example of using an API that returns a JSON
object.
To load a string containing a json object into a Python object
use json.loads()
>>> x = json.loads('{"a": 1, "b": 2}')
>>> x
{u'a': 1, u'b': 2}
Note: u'a' means that 'a' is a string encoded in Unicode
The reverse operation will take a Python object, and create
JSON string representation of it.
>>> json.dumps([ {'a':1, 'b':2}, [1,2] ])
'[{"a": 1, "b": 2}, [1, 2]]'
"""
import urllib
import json
if __name__ == "__main__":
## Find the bounding box of an adddress, in this case Troy, NY
url = "http://nominatim.openstreetmap.org/search?q=Troy, NY&format=json&"\
"polygon_geojson=1&addressdetails=0"
content = (urllib.urlopen(url)).read()
content = json.loads(content)
print content[0]['boundingbox']
### Find photos in Troy, NY given the bounding box of
### latitude and longitude
url2 = "http://www.panoramio.com/map/get_panoramas.php?set=public&"\
"from=0&to=5&minx=%s&miny=%s&maxx=%s&maxy=%s&size=medium&mapfilter=true" \
%('-73.8517851','42.5684117','-73.5317851','42.8884117')
content = (urllib.urlopen(url2)).read()
content = json.loads(content)
for photo in content['photos']:
print photo['photo_url']
""" Create two dictionaries from IMDB for easy look up
and ask for an actor or movie name repeatedly and
print relevant info
Dictionaries:
actors: key: name, value: set of movies
movies: key: movie, value: set of actors in that movie
"""
def read_values():
actors = {}
movies = {}
for line in open("imdb_data.txt"):
m = line.strip().split("|")
for i in range(len(m)):
m[i] = m[i].strip() ## strip space of split values
actor = m[0]
movie = m[1]
if actor in actors:
actors[actor].add( movie )
else:
actors[actor] = set( [movie] )
if movie in movies:
movies[movie].add( actor )
else:
movies[movie] = set( [actor] )
return actors, movies
if __name__ == "__main__":
actors, movies = read_values()
while True:
cmd = raw_input("1 to search actor, 2 to search movie => ")
if cmd not in ['1','2']:
break
if cmd == '1':
actor = raw_input("Actor name => ")
if actor in actors:
print "Actor %s found" %actor
for movie in actors[actor]:
print movie
elif cmd == '2':
movie = raw_input("Movie name => ")
if movie in movies:
print "Movie %s found" %movie
for actor in movies[movie]:
print actor
print
Lecture 17¶
Module: lec17_examples
— Examples programs from Lecture 17¶
Code:
""" This program illustrates the use of dictionaries
and sets to compute degree of Kevin Bacon up to 2.
As a challenge, figure out how to make the degree
function work for any degree and loop to find the
next degree until no new actors found.
Step 1:
Create two dictionaries from IMDB for easy look up
and ask for an actor or movie name repeatedly and
print relevant info
Dictionaries:
actors: key: name, value: set of movies
movies: key: movie, value: set of actors in that movie
Step 2: Ask for an actor
1. If the actor is Kevin Bacon, return degree 0
2. If the actor is in a movie with Kevin Bacon, return 1
3. If the actor is in a movie with a degree 1 person (but
is not Kevin Bacon or degree 1 actor), return 2
Otherwise, return nothing (for now).
"""
def read_values():
actors = {}
movies = {}
for line in open("imdb_data.txt"):
m = line.strip().split("|")
for i in range(len(m)):
m[i] = m[i].strip() ## strip space of split values
actor = m[0]
movie = m[1]
if actor in actors:
actors[actor].add( movie )
else:
actors[actor] = set( [movie] )
if movie in movies:
movies[movie].add( actor )
else:
movies[movie] = set( [actor] )
return actors, movies
def find_degree(inputactor, actors, movies):
degree0 = set(['Bacon, Kevin'])
if inputactor in degree0:
return 0
moviesfordegree0 = actors['Bacon, Kevin']
degree1 = set([])
for movie in moviesfordegree0:
actorset = movies[movie]
degree1 |= actorset
degree1 = degree1 - degree0
if inputactor in degree1:
return 1
degree2 = set([])
## find movies degree1 actors were in
allmovies = set([])
for actor in degree1:
allmovies |= actors[actor]
## find actors in degree1 actors' movies
for movie in allmovies:
degree2 |= movies[movie]
## subtract degree 0 and degree 1
degree2 = degree2 - (degree1 | degree0)
if inputactor in degree2:
return 2
if __name__ == "__main__":
actors, movies = read_values()
while True:
actor = raw_input("Actor name (stop to end) => ")
if actor == 'stop':
break
if actor not in actors:
print "Actor not found"
else:
deg = find_degree(actor, actors, movies)
if deg == None:
print "Degree is above 2"
else:
print "Degree is", deg
""" Class example, a simple class of 2d objects
"""
class Point2d(object):
def __init__(self, x0, y0):
"""Initialize to make sure each point has an x, y value. """
self.x = x0
self.y = y0
def length(self):
""" Return the length of a point. """
return (self.x**2 + self.y**2)**(0.5)
def __str__(self):
""" Returns the string representation of object.
Call as:
str(x)
print x ##calls this function and prints the result string
"""
return "(%d, %d)" %(self.x, self.y)
def distance(self, other):
""" Returns the distance between two points. """
d = (self.x-other.x)**2 + (self.y-other.y)**2
return d**(0.5)
def __add__(self, other):
""" Adds two points and returns a new point with the
addition of values. You can call this as:
pt1.__add__(pt2)
pt1+pt2
"""
new = Point2d(self.x, self.y)
new.x += other.x
new.y += other.y
return new
def __sub__(self, other):
""" Subtracts other from self, and returns a new point
containing the result. You can call this as:
pt1.__sub__(pt2)
pt1-pt2
"""
new = Point2d(self.x, self.y)
new.x -= other.x
new.y -= other.y
return new
if __name__ == "__main__":
##Test code here
pt1 = Point2d(10, 20)
pt2 = Point2d(3, 4)
print pt1.x, pt1.y
print pt2.x, pt2.y
print pt1 ## calls the __str__ method
print str(pt1) ## this is identical to the above call
print "Length of pt1 is", pt1.length()
print "Length of pt2 is", pt2.length()
print "Distance between", pt1, "and", pt2, "is:", pt1.distance(pt2)
print pt1 ##calls str
pt3 = pt1+pt2
print pt3
print "Subtraction:", pt1-pt2
print "Pt1:", pt1, "Pt2:", pt2
print "Add/Subtract do not change the input objects"
Lecture 18¶
Module: lec18_examples
— Examples programs from Lecture 18¶
Code:
""" Class for storing time.
"""
class Time(object):
def __init__(self, hr, min, sec):
"""Store time internally as seconds. """
if hr > 24:
hr = hr%24
self.seconds = hr*60*60 + min*60 + sec
def convert(self):
"""Convert seconds to hour, minute and seconds """
hr = self.seconds/3600
min = (self.seconds - hr*3600)/60
sec = self.seconds - hr*3600 - min*60
return hr, min, sec
def __str__(self):
"""Print time as military time. """
hr, min, sec = self.convert()
return '%02d:%02d:%02d' \
%(hr, min, sec)
def __add__(self, other):
"""Add two time objects and return a new time object. """
total = self.seconds + other.seconds
hr = total/3600
min = (total - hr*3600)/60
sec = total - hr*3600 - min*60
return Time(hr, min, sec)
def __sub__(self, other):
""" Subtract one time object from another. If negative,
assume time is in the previous day.
"""
total = self.seconds - other.seconds
if total < 0:
total += 24*3600
hr = total/3600
min = (total - hr*3600)/60
sec = total - hr*3600 - min*60
return Time(hr, min, sec)
def am_or_pm(self):
""" Is Time before or after 12:00:00 """
if self.seconds < 43200:
return "AM"
else:
return "PM"
if __name__ == "__main__":
time1 = Time(5,5,5)
time2 = Time(12,0,0)
print str(time1+time2)
print str(time2-time1)
print time2.am_or_pm()
print time1.am_or_pm()
""" Class example, this file contains two classes:
Point1d for 1 dimensional objects
Point2d for 2 dimensional objects
Distinguish between a class and a file it is saved in
This file name: points.py
Class name: Point1d, Point2d
When importing into another program you can either use:
1.
import points
x = points.Point2d(5,10)
2.
from points import Point2d
x = Point2d(5,10)
"""
class Point1d(object):
def __init__(self, x0, name):
self.x = x0
self.name = name
def __str__(self):
return "%s: %d" %(self.name, self.x)
def __lt__(self, other):
return self.x < other.x
class Point2d(object):
def __init__(self, x0, y0):
"""Initialize to make sure each point has an x, y value. """
self.x = x0
self.y = y0
def length(self):
""" Return the length of a point. """
return (self.x**2 + self.y**2)**(0.5)
def __str__(self):
""" Returns the string representation of object.
Call as:
str(x)
print x ##calls this function and prints the result string
"""
return "(%d, %d)" %(self.x, self.y)
def distance(self, other):
""" Returns the distance between two points. """
d = (self.x-other.x)**2 + (self.y-other.y)**2
return d**(0.5)
def __add__(self, other):
""" Adds two points and returns a new point with the
addition of values. You can call this as:
pt1.__add__(pt2)
pt1+pt2
"""
new = Point2d(self.x, self.y)
new.x += other.x
new.y += other.y
return new
def haversine_distance(self, other):
""" Haversine distance, in miles between two locations
with their latitude and longitude.
"""
import math
lat1 = self.y * math.pi / 180.0
long1 = self.x * math.pi / 180.0
lat2 = other.y * math.pi / 180.0
long2 = other.x * math.pi / 180.0
# Now the real work.
dlat = (lat1-lat2)
dlong = (long1-long2)
a = math.sin(dlat/2)**2 + \
math.cos(lat1) * math.cos(lat2) * math.sin(dlong/2)**2
c = 2*math.atan2( math.sqrt(a), math.sqrt(1-a) )
R = 6371 / 1.609
return R*c
def __sub__(self, other):
""" Subtracts other from self, and returns a new point
containing the result. You can call this as:
pt1.__sub__(pt2)
pt1-pt2
"""
return Point2d( self.x-other.x, self.y-other.y )
def __eq__ (self, other):
return self.x == other.x \
and self.y == other.y
if __name__ == "__main__":
##Test code here
pt1 = Point2d(10, 20)
pt2 = Point2d(3, 4)
print pt1.x, pt1.y
print pt2.x, pt2.y
print pt1 ## calls the __str__ method
print str(pt1) ## this is identical to the above call
print "Length of pt1 is", pt1.length()
print "Length of pt2 is", pt2.length()
print "Distance between", pt1, "and", pt2, "is:", pt1.distance(pt2)
print pt1 ##calls str
pt3 = pt1+pt2
print pt3
print "Subtraction:", pt1-pt2
print "Pt1:", pt1, "Pt2:", pt2
print "Add/Subtract do not change the input objects"
pttroy = Point2d(-73.69, 42.73)
ptalbany = Point2d(-73.72, 42.70)
print "Distance between Troy and Albany", pttroy.haversine_distance(ptalbany)
"""Illustrates the use of classes both defined in the same file
as in Business and also imported from a different file, as in
Point2d.
"""
from points import Point2d
class Business(object):
def __init__(self, name, lat, lon, address, url, category, scores):
self.name = name
self.loc = Point2d(float(lon), float(lat))
self.address = address.replace('+','\n\t')
self.url = url
self.category = category
## convert scores into integer
for i in range(len(scores)):
scores[i] = int(scores[i])
self.scores = scores
def avgscore(self):
if len(self.scores) < 3:
return sum(self.scores)/float(len(self.scores))
else:
s = list(self.scores)
s.sort() ## we do not want to change the actual ordering of scores
return ( sum(s[1:-1])/float(len(s)-2) )
def __str__(self):
return "%s\n\t%s\n\tLocation: %s\nAvg Score: %.1f" \
%(self.name, self.address, self.loc, self.avgscore())
if __name__ == "__main__":
f = open('yelp.txt')
## Create a list of business objects
businesses = []
for line in f:
m = line.strip().split("|")
b = Business(m[0],m[1],m[2],m[3],m[4],m[5],m[6:])
businesses.append(b)
finished = False
while not finished:
name = raw_input("Enter a business (stop to end) => ")
if name == 'stop':
finished = True
else:
found = False
for b in businesses:
if b.name == name:
print b
found = True
break ##no need to continue checking with other businesses
if not found:
print "Business is not found"
Lecture 19¶
Module: lec19_examples
— Examples programs from Lecture 19¶
Code:
""" This module shows different programming styles
for different basic problems.
Note: short circuited if statements
c1 and c2: if c1 is false, no need to check c2
c1 or c2: if c1 is true, no need to check c2
The problem is that you may not even get a syntax
error until you are forced to evaluate c2
a = 1
b = 'q'
d = 0
if a>1 and 1/0 < 2: ##normally tou should get division by zero
print "Hello"
if d>1 or int(b) < 2: ##normally you should get an error for int('q')
print "Hello"
"""
## Pattern 1: return as soon as you find out something
## If you reach the end of the function, then it must be True/False?
def find_valid(word):
""" Return True if word contains at least one digit between 1 and 5
False otherwise.
"""
for letter in word:
if letter in ['1','2','3','4','5']:
return True
###if you arrive here, no letter in 1-5 range
return False
## Pattern 2: keep a Boolean of what value you should return
## Note that this is less efficient because you may not need to
## check the rest of word, but you still do in this version.
def find_valid2(word):
""" Return True if word contains at least one digit between 1 and 5
False otherwise.
"""
isvalid = False
for letter in word:
if letter in ['1','2','3','4','5']:
isvalid = True
###if you arrive here, no letter in 1-5 range
return isvalid
## Pattern 1: Now the problem is different. We cannot find
## if all words are between 1 and 5 looking at a single character
## but we can check the reverse, if the value is not between 1 and 5.
find_between(word):
""" Return true if all digits in word are between 1 and 5,
False otherwise.
"""
for letter in word:
if letter.isdigit() and letter not in ['1','2','3','4','5']:
return False
return True
## Pattern 2 for the same problem.
find_between2(word):
""" Return true if all digits in word are between 1 and 5,
False otherwise.
"""
isvalid = True
for letter in word:
if letter.isdigit() and letter not in ['1','2','3','4','5']:
isvalid = False
return isvalid
if __name__ == "__main__":
## This is an example of a while loop that does not use break
## but if statements to decide when to end.
finished = True
while not finished:
cmd = raw_input("Enter an integer between 1 and 5 (stop to end) => ")
if cmd == 'stop':
finished = True
elif not cmd.isdigit():
print "Please enter an integer between 1 and 5"
elif 1<=int(cmd)<=5:
print "Good value"
""" This program illustrates different patterns for
iterating through a list to keep track of values.
"""
def find_min_year(L):
"""Given a list L with pairs of year,value,
find a year (the first year in the list)
with the smallest value.
"""
##set the initial value to a valid value
minvalue = L[0][1]
minyear = L[0][0]
for (year, val) in L:
if val < minvalue:
minvalue = val
minyear = year
return minyear
def find_min_years(L):
"""Given a list L with pairs of year,value,
find the list of all years with the smallest value.
"""
## initialize to a valid value
minvalue = L[0][1]
minyear = [ L[0][0] ]
for i in range(1,len(L)):
year, val = L[i]
if val < minvalue:
minvalue = val
minyear = [year]
elif val == minvalue: ##also add years with the same value
minyear.append (year)
return minyear, minvalue
def find_max_years(L):
"""Given a list L with pairs of year,value,
find the year with the largest value (same as above,
put looks for large values).
"""
maxvalue = L[0][1]
maxyear = [ L[0][0] ]
for i in range(1,len(L)):
year, val = L[i]
if val > maxvalue:
maxvalue = val
maxyear = [year]
elif val == maxvalue:
maxyear.append (year)
return maxyear, maxvalue
def find_allpairs(L):
"""Find all unique pairs of values. Do not match a value
to itself, do not return the same pair twice:
L=[1,2,3]
return [(1,2), (1,3), (2,3)]
do not return: (1,1), (2,2), (3,3),
(2,1)** since (1,2) is already there
(3,1)** since (3,1) is already there
(3,2)** since (3,2) is already there
"""
allpairs = []
for i in range(len(L)-1):
for j in range(i+1,len(L)):
val1 = L[i]
val2 = L[j]
allpairs.append ( (val1, val2) )
return allpairs
def find_threeletters(word,searchword):
"""Count how many times a sequence of three letters appear in
word. See how we iterate only once, but stop 3 characters before
the end of the word.
"""
count = 0
for i in range(len(word)-3):
if word[i:i+3] == searchword:
count += 1
return count
if __name__ == "__main__":
## Test code here
## Temperature and snowfall values for Troy in December
snowfall = [(1958, 8.6), (1959, 4.6), (1960, 12.0), (1961, 14.5),
(1962, 14.0), (1963, 19.9), (1964, 8.5), (1965, 2.8),
(1966, 24.5), (1967, 14.0), (1968, 16.0), (1969, 48.2),
(1970, 36.2), (1971, 7.0), (1973, 10.0), (1974, 7.0),
(1975, 13.0), (1976, 6.5), (1977, 9.1), (1978, 14.8),
(1980, 5.6), (1982, 2.5), (1983, 3.0), (1984, 10.0),
(1987, 4.0), (1988, 3.8), (1989, 0.5), (1990, 5.0),
(1991, 4.0),
(1992, 4.3), (1993, 7.5), (1994, 1.5), (1995, 21.0),
(1996, 6.5), (1997, 0), (1998, 2.5), (1999, 0),
(2000, 15.9),
(2001, 6.5), (2002, 6.2), (2003, 30.1), (2004, 5.0),
(2005, 6.4), (2006, 1.0), (2007, 17.8), (2008, 16.2),
(2010, 0.5), (2011, 3.0), (2012, 6.0), (2013, 14.6),
(2014, 5.8)]
meantemp = [(1956, 32.4), (1957, 34.1), (1958, 20.1), (1959, 31.2),
(1960, 22.8), (1961, 28.8), (1962, 24.7), (1963, 20.7),
(1964, 28.1), (1965, 31.7), (1966, 28.5), (1967, 30.7),
(1968, 24.9), (1969, 24.4), (1970, 25.6), (1971, 32.1),
(1972, 29.9), (1973, 30.7), (1974, 31.4), (1975, 25.9),
(1976, 22.4), (1977, 26.6), (1978, 29.8), (1979, 32.5),
(1980, 20.7), (1981, 29.5), (1982, 35.4), (1983, 26.0),
(1984, 34.7), (1985, 25.6), (1986, 30.4), (1987, 31.9),
(1988, 27.4), (1989, 14.4), (1990, 35.0), (1991, 30.1),
(1992, 29.6), (1993, 28.4), (1994, 32.5), (1995, 24.4),
(1996, 35.4), (1997, 31.1), (1998, 36.1), (1999, 31.2),
(2000, 23.4), (2001, 36.4), (2002, 27.4), (2003, 29.3),
(2004, 28.6), (2005, 27.8), (2006, 37.3), (2007, 28.1),
(2008, 30.2), (2010, 26.0), (2011, 35.4), (2012, 33.8),
(2013, 27.9), (2014, 32.8)]
## Example DNA sequence
sequence = "ATCACTGTAGTAGTAGCTGGAAAGAGAAATCTGTGACTCCAATTAGCCA" \
"GTTCCTGCAGACCTTGTGAGGACTAGAGGAAGAATGCTCCTGGCTGTTT" \
"TGTACTGCCTGCTGTGGAGTTTCCAGACCTCCGCTGGCCATTTCCCTAG" \
"AGCCTGTGTCTCCTCTAAGAACCTGATGGAGAAGGAATGCTGTCCACCG" \
"TGGAGCGGGGACAGGAGTCCCTGTGGCCAGCTTTCAGGCAGAGGTTCC" \
"TGTCAGAATATCCTTCTGTCCAATGCACCACTTGGGCCTCAATTTCCCTT" \
"CACAGGGGTGGATGACCGGGAGTCGTGGCCTTCCGTCTTTTATAATAGG" \
"ACCTGCCAGTGCTCTGGCAACTTCATGGGATTCAACTGTGGAAACTGCAA" \
"GTTTGGCTTTTGGGGACCAAACTGCACAGAGAGACGACTCTTGGTGAGAA" \
"GAAACATCTTCGATTTGAGTGCCCCAGAGAAGGACAAATTTTTTGCCTACC" \
"TCACTTTAGCAAAGCATACCATCAGCTCAGACTATGTCATCCCCATAGGGA" \
"CCATTGGCCAAATGAAAAATGGATCAACACCCATGTTTAACGACATCAATA" \
"TTTATGACCTCTTTGTCTGGATGCATTATTATGTGTCAATGGATGCACTGC" \
"TTGGGGGATCTGAAATCTGGAGAGACATTGATTTTGCCCATGAAGCACCA" \
"GCTTTTCTGCCTTGGCATAGACTCTTCTTGTTGCGGTGGGAACAAGAAATC" \
"CAGAAGCTGACAGGAGATGAAAACTTCACTATTCCATATTGGGACTGGCG" \
"GGATGCAGAAAAGTGTGACATTTGCACAGATGAGTACATGGG"
print "Min"
print "Snowfall", find_min_years(snowfall)
print "Mean temperature", find_min_years(meantemp)
print "Max"
print "Snowfall", find_max_years(snowfall)
print "Mean temperature", find_max_years(meantemp)
print "All pairs for [1,2,3,4]", \
find_allpairs([1,2,3,4])
print "ACA", find_threeletters(sequence, 'ACA')
from PIL import Image
def copy_image(fname,copytype):
""" We will copy an image from a given file to a new image
and return the image object.
copytype is one of: same, right, down
add your versions!
"""
im = Image.open(fname)
pix = im.load() ##get an array of pixels for the image
w,h = im.size
newim = Image.new("RGB", (w,h), "white") ## create a blank image
##of the same size
newpix = newim.load() ##get an array of pixels for the new image
##now copy the pixels from one image to the next
for i in range(w):
for j in range(h):
if copytype == 'same':
newpix[i,j] = pix[i,j]
elif copytype == 'right':
newpix[w-i-1,j] = pix[i,j]
elif copytype == 'down':
newpix[i,h-j-1] = pix[i,j]
##return the new images
return newim
if __name__ == "__main__":
newim = copy_image('bolt.jpg','same')
newim.show()
newim = copy_image('bolt.jpg','right')
newim.show()
newim = copy_image('bolt.jpg','down')
newim.show()
Lecture 20¶
Module: lec20_examples
— Examples programs from Lecture 20¶
Code:
"""
Problem: Find the index of the two smallest values
We will also learn how to time running time of algorithms
using the time module
Algorithm:
Idea 1:
Make a copy of list
Sort the copy
Find the two smallest values (index 0,1)
Find the index of these values**
Idea 2:
Initialize two smallest values to 0,1
Then, iterate through list
and remember the smallest two values
and their index
Idea 3:
Make a list of value, index
Sort the list
Return the index for the first two
Idea 4: (implement this yourself)
Make a copy of list
find min in copy
find index of min in copy
remove min from list copy
find next min in copy
find index of min in the next copy
"""
import random
import time
def smallest_two1(L):
""" Assume n items in List L.
Complexity: O(nlogn + 3n) =mostly costly element= O(nlogn)
"""
L1 = list(L) ### O(n): read and append to new list for n elements
L1.sort() ### O(nlogn): sorting: we will see this in sorting lecture
min1,min2 = L1[0], L1[1] ## O(1)
i1 = L.index(min1) ### O(n): compare against every element in worst case
i2 = L.index(min2) ### O(n): compare against every element in worst case
if i2 == i1:
i2 = L.index(min2, i1+1)
return i1, i2
def smallest_two2(L):
""" Assume n items in List L.
Complexity: O(n)
"""
if L[0] < L[1]:
i1, i2 = 0,1
else:
i1, i2 = 1,0
for i in range(2,len(L)): ### O(n)
if L[i] < L[i1]:
i1, i2 = i, i1
elif L[i] < L[i2]:
i2 = i
return i1, i2
def smallest_two3(L):
""" Assume n items in List L.
Complexity: O(n+nlog n) =mostly costly element= O(nlogn)
Note: Compared to smallest_two3, we are sorting a more complex list
(list of 2-tuples). So efficiency will depend on the implementation
of that sort
"""
L1 = []
for (i,val) in enumerate(L): ## O(n): read and append each element
L1.append( (val, i) )
L1.sort() ## O(nlogn): sorting, we will see why
return L1[0][1], L1[1][1]
if __name__ == "__main__":
print "Test cases"
L = range(1000)
random.shuffle(L)
start = time.time()
a,b = smallest_two1(L)
end = time.time()
print "smallest_two1 took %f seconds" %(end-start)
start = time.time()
a,b = smallest_two2(L)
end = time.time()
print "smallest_two2 took %f seconds" %(end-start)
start = time.time()
a,b = smallest_two3(L)
end = time.time()
print "smallest_two3 took %f seconds" %(end-start)
"""
Nose test cases
Normal test cases:
[1,2,3,4] ** smallest values at the beginning
[3,2,1,4] ** smallest values in the middle
[4,3,2,1] ** smallest values at the end
[1,4,3] ** odd length list
[4,2,1,1,5] ** duplicate vaues
Edge cases (unusual cases, decide on what should be returned first!)
Small lists: [] , [1], [2,1]
Lists with things other than numbers: ['a','b'] **note this should work fine
as long as there is a comparison between values
"""
import nose
from smallest_two import *
## test for the first function
def test_st1_1():
assert smallest_two1([1,2,3,4]) == (0,1)
def test_st1_2():
assert smallest_two1([3,2,1,4]) == (2,1)
def test_st1_3():
assert smallest_two1([4,3,2,1]) == (3,2)
def test_st1_4():
assert smallest_two1([1,4,3]) == (0,2)
def test_st1_5():
assert smallest_two1([4,2,1,1,5]) == (2,3)
## test for the second function
def test_st2_1():
assert smallest_two2([1,2,3,4]) == (0,1)
def test_st2_2():
assert smallest_two2([3,2,1,4]) == (2,1)
def test_st2_3():
assert smallest_two2([4,3,2,1]) == (3,2)
def test_st2_4():
assert smallest_two2([1,4,3]) == (0,2)
def test_st2_5():
assert smallest_two2([4,2,1,1,5]) == (2,3)
## test for the second function
def test_st3_1():
assert smallest_two3([1,2,3,4]) == (0,1)
def test_st3_2():
assert smallest_two3([3,2,1,4]) == (2,1)
def test_st3_3():
assert smallest_two3([4,3,2,1]) == (3,2)
def test_st3_4():
assert smallest_two3([1,4,3]) == (0,2)
def test_st3_5():
assert smallest_two3([4,2,1,1,5]) == (2,3)
if __name__ == "__main__":
nose.runmodule(exit=False)
"""
Input: a sorted list and a value
Find the index of value if value is in list
of if it is not, return the index of where
value would be inserted to keep the list sorted.
"""
import random
import time
def search(L,val):
""" Linear search, each item has to be searched. """
for i in range(len(L)): ## O(n)
if L[i] >= val:
return i
return len(L)
def binsearch(L, val):
""" Binary search: always look at the middle value
of a list, then look at the middle value of the remaining
list. You can do this at most O(log n) times, where
log is base 2.
"""
low = 0
high = len(L)-1
while low != high:
mid = (low+high)/2
#print low, high, mid
#raw_input()
if L[mid] < val:
low = mid+1
else:
high = mid
return low
if __name__ == "__main__":
print "Time tests"
k = 500000
L = range(k)
start = time.time()
a = search(L, k+1)
end = time.time()
print "Linear search took", end-start, "seconds"
start = time.time()
a = binsearch(L,k+1)
end = time.time()
print "Binary search took", end-start, "seconds"
Lecture 21¶
Module: lec21_examples
— Examples programs from Lecture 21¶
Code:
""" Sorting
Naive: O(n^2) => Insertion sort
Best: O(n logn) => Merge sort
**
Internal sort: O(n logn)
** but much faster as it is written in C and
compiled, instead of the interpreted version.
"""
import time
import random
def time_function(L, func):
""" Illustrates how you can send a function as an argument
to another function as well. Runs the function called func,
and returns the time.
"""
L1 = list(L)
start = time.time()
func(L1)
end = time.time()
print "Method: %s took %f seconds" \
%((func.__name__).ljust(20), end-start)
def ins_sort(L):
""" Total complexity:
1 + 2 + ... + n-1 = n(n-1)/2
Hence, this algorithm is O(n^2)
Note: xrange iterates through the
list without generating it explicitly
(a bit cheaper than using range).
"""
for i in xrange(1, len(L)):
### i ranges between 1 and n-1
val = L[i]
j = i-1
while j >= 0 and L[j] > val: ##at most i times
L[j+1] = L[j]
j -= 1
L[j+1] = val
def merge(L1, L2):
""" Assume L1 and L2 are sorted.
Create a new list L that is the merged
version of L1&L2.
This is the efficient version of merge
that does not modify the input lists, as pop
is costly, even though it is a constant time operation.
"""
L = []
i = 0
j = 0
while i < len(L1) and j < len(L2):
if L1[i] < L2[j]:
val = L1[i]
L.append( val )
i += 1
else:
val = L2[j]
L.append( val )
j += 1
## at this point, either L1 or L2 has run out of values
## add all the remaining values to the end of L.
L.extend(L1[i:])
L.extend(L2[j:])
return L
def merge_with_pop(L1, L2):
""" Assume L1 and L2 are sorted.
Create a new list L that is the merged
version of L1&L2.
This is the easy version of merge, using pop.
"""
L = []
while len(L1) > 0 and len(L2) > 0:
if L1[0] < L2[0]:
val = L1.pop(0)
L.append( val )
else:
val = L2.pop(0)
L.append( val )
L.extend(L1)
L.extend(L2)
return L
def merge_sort(L):
""" Complexity: Every step of while loop:
we merge lists which together would make up the
input list. So, there are about O(n) comparisons
and list insertions.
For example, in the beginning, we have n lists,
so we merge n/2 times lists of 1 items, each requiring
about 2 insertions. Total cost 2*n/2=n.
How many times the while loop is executed?
We start with n lists
At the next step, we have n/2
We will keep halving, which we can do at most log n
times (log base 2 of n).
Total cost: O(n* log n)
"""
L1 = []
for val in L:
L1.append( [val] )
while len(L1) > 1:
L2 = []
for i in range(0, len(L1)-1, 2):
Lmerged = merge( L1[i], L1[i+1] )
L2.append( Lmerged )
if len(L1)%2 == 1:
L2.append( L1[-1] )
L1 = L2
return L1[0]
def merge_sort_with_pop(L):
""" This is the version that uses the less efficient
version of merge. See the time comparisons by running
this program.
The time complexity is the same, O(n log n), but
has higher cost because of inefficient use of lists.
"""
L1 = []
for val in L:
L1.append( [val] )
while len(L1) > 1:
L2 = []
for i in range(0, len(L1)-1, 2):
Lmerged = merge_with_pop( L1[i], L1[i+1] )
L2.append( Lmerged )
if len(L1)%2 == 1:
L2.append( L1[-1] )
L1 = L2
return L1[0]
if __name__ == "__main__":
k = 10000
L = range(k)
random.shuffle(L)
time_function( L, ins_sort )
time_function( L, merge_sort )
time_function( L, merge_sort_with_pop )
time_function( L, list.sort )
Lecture 22¶
Module: lec22_examples
— Examples programs from Lecture 22¶
Code:
"""
Simply Tkinter program to show creation of an interface.
This is not the preferred method for creating a user interface
as it will be difficult to attach functions to the buttons.
"""
from Tkinter import *
root = Tk()
main_frame = Frame(root)
main_frame.pack()
top_frame = Frame(main_frame)
top_frame.pack(side=TOP)
bottom_frame = Frame(main_frame)
bottom_frame.pack(side=BOTTOM)
canvas = Canvas(top_frame, height=400, width=400)
canvas.pack()
canvas.create_oval(100,100,300,300)
button3 = Button(bottom_frame, text="Bottom Left")
button3.pack(side=LEFT)
button4 = Button(bottom_frame, text="Bottom Right")
button4.pack(side=RIGHT)
root.mainloop()
print "Hello"
"""
Sample TkInter program: Illustrates the use of a class to
encapsulate all interface elements, use of buttons, canvases
and frames.
"""
from Tkinter import *
class MyApp(object):
def __init__(self, parent):
## interface elements
self.parent = parent
self.main_frame = Frame(parent)
self.main_frame.pack()
self.top_frame = Frame(self.main_frame)
self.top_frame.pack(side=TOP)
self.bottom_frame = Frame(self.main_frame)
self.bottom_frame.pack(side=BOTTOM)
self.canvas = Canvas(self.top_frame, height=500, width=500)
self.canvas.pack()
self.drawbutton = Button(self.bottom_frame, text="Draw", \
command=self.draw)
self.drawbutton.pack(side=LEFT)
self.clearbutton = Button(self.bottom_frame, text="Clear", \
command=self.clear)
self.clearbutton.pack(side=LEFT)
self.increasebutton = Button(self.bottom_frame, text="Increase", \
command=self.increase)
self.increasebutton.pack(side=LEFT)
self.decreasebutton = Button(self.bottom_frame, text="Decrease", \
command=self.decrease)
self.decreasebutton.pack(side=LEFT)
self.quitbutton = Button(self.bottom_frame, text="Quit", \
command=self.terminate )
self.quitbutton.pack(side=RIGHT)
## other class attributes
self.drawingfinished = True
self.numcircles = 4
def increase(self):
self.numcircles *= 2
def decrease(self):
self.numcircles /= 2
self.numcircles = max(self.numcircles, 4)
def clear(self):
self.canvas.delete("all")
def draw(self):
self.drawingfinished = False
x,y = 250, 250
radius = 200/self.numcircles
for i in range(self.numcircles):
rad = radius*(i+1)
self.canvas.create_oval(x-rad,y-rad,\
x+rad,y+rad)
self.canvas.update()
self.canvas.after(100)
self.drawingfinished = True
def terminate(self):
if self.drawingfinished:
self.parent.destroy()
if __name__ == "__main__":
root = Tk()
myapp = MyApp(root)
root.mainloop()
print "Finished the program"
Lecture 23¶
Module: lec23_examples
— Examples programs from Lecture 23¶
Code:
"""
Recursion: Example functions
Basis/Stopping condition:
Define when your function should stop
calling itself recursively
Inductive/Recursive step:
Define how the function can compute its
output by calling itself recursively and then
use the result.
Example:
Recursive step:
factorial(n) = factorial(n-1)*n
If you had the correct output for factorial(n-1),
you can multiply it with n to find the correct output.
"""
def blast(n):
if n <= 0: ##basis step
print "Blast off!"
else: ## recursive step
print n
blast(n-1)
print n
def factorial(n):
if n == 1:
return 1
else:
x = factorial(n-1)
return x*n
def factorial_iterative(n):
"""Many recursive functions can be written without recursion. """
x = 1
for i in range(n+1):
x *= i
return x
def fib(n):
"""Fibonacci sequence: Even though it is defined recursively
this function would be much more efficient to write without
recursion.
"""
if n == 0:
return 0
elif n == 1:
return 1
else:
return fib(n-1) + fib(n-2)
if __name__ == "__main__":
##Testing code here.
val = int(raw_input("Number => "))
print "Factorial of %d is %d" %(val, factorial(val))
for i in range(10):
print "fib(%d) = %d" %(i, fib(i))
Lecture 24¶
Module: lec24_examples
— Examples programs from Lecture 24¶
Code:
"""
Problem: Find the mode of a list: the value that occurs most frequently
Refine requirements:
1. How do I break ties or should I return all values that occur most
frequently?
2. What should I return for empty list?
3. What are the different values and is there a limited number of them?
Expected number of distinct values in the list: m
If m is constant or it can vary?
Do we expect m to be as large as n (#items in the list)
4. What are the values? Integer or float?
Which values are considered the same?
--------
Version 1.
Find mode, given:
A list of integers, (m is not fixed/not a constant)
I am looking for any value that occurs most frequently
(break ties arbitrarily)
Return None if the list is empty
Possible solutions:
Note: For complexity, we care about the most costly/complex
step.
List solution: O(n logn)
Create a copy of the list: O(n)
Sort a copy of the list: O(n logn)
Go through the list once, and keep track of
#times an item is seen and the most frequent item
O(n)
Set solution: O(n * m)
Find the set(L): O(n) => m distinct values
For each distinct value ( set(L)): [repeat loop m times: O(n*m)]
Count how many times the value occurs in L
and keep track of the max O(n)
Dictionary solution: O(n)
Use a dictionary D:
key: a value in list
value: count of that values
For each item in list: O(n)
update counts in D
For each key in the dictionary: O(m)
check the count and keep
track of the max
"""
import time
import random
def time_algorithm(L, func):
start = time.time()
val = func(L)
end = time.time()
print "%s took: %.4f seconds" %(func.__name__, end-start)
def mode_list(L):
if len(L) == 0:
return None
L1 = L[:] ##make a copy of the list to work with
L1.sort()
last_val = L1[0] ##keep track of counts of the current value
last_count = 1
max_val = None ##keep track of the max count and value for it
max_count = 0
for i in xrange(1,len(L1)):
val = L1[i]
if val == last_val:
last_count += 1
else: ##we have seen a new value
if last_count > max_count:
max_count = last_count
max_val = last_val
last_val = val
last_count = 1
if last_count > max_count:
max_count = last_count
max_val = last_val
return max_val
def mode_set(L):
max_val = None
max_count = 0
for item in set(L): ##for each distinct item
cur_cnt = L.count(item) ##find the count
if cur_cnt > max_count: ##check if this is the current max count
max_count = cur_cnt
max_val = item
return max_val
def mode_dict(L):
D = {}
for item in L: ##iterate through the list and find counts
if item not in D:
D[item] = 1
else:
D[item] += 1
max_val = None
max_count = 0
for key in D: ##go through the counts to find the max
cnt = D[key]
if cnt > max_count:
max_count = cnt
max_val = key
return max_val
if __name__ == "__main__":
## test code!, try [], [1,2,2,3], [3,3,2,1]
## timing of the algorithm
## We expect dictionary to be always cheapest
## Set solution is fast when m is very small, otherwise
## Set solution is similar to n^2 and the most expensive one.
## Create a random list given n/m
n = 4000
m = n/10 ##Try also n=10
x = []
for i in range(n):
x.append( random.randint(1,m) )
## Test all the algorithms
print "N: %d, M: %d" %(n,m)
print
time_algorithm(x, mode_list)
time_algorithm(x, mode_set)
time_algorithm(x, mode_dict)