Thursday, June 30, 2011

Animation with matplotlib, bringin the Sierpinski triangle to life

Few post ago we have seen how to plot the Sierpinski triangle. This post is an update that shows how to create an animation where at each step a new point of the fractal is plotted:
from numpy import *
import pylab

x = [0, 0];

A = [ [.5, 0], [0, .5] ];
b1 = [0, 0];
b2 = [.5, 0];
b3 = [.25, sqrt(3)/4];

pylab.ion() # animation on

#Note the comma after line. This is placed here because plot returns a list of lines that are drawn.
line, = pylab.plot(x[0],x[1],'m.',markersize=6) 
pylab.axis([0,1,0,1])

data1 = []
data2 = []
iter = 0

while True:
 r = fix(random.rand()*3)
 if r==0:
  x = dot(A,x)+b1
 if r==1:
  x = dot(A,x)+b2
 if r==2:
  x = dot(A,x)+b3
 data1.append(x[0]) 
 data2.append(x[1])
 line.set_xdata(data1)  # update the data
 line.set_ydata(data2)
 pylab.draw() # draw the points again
 iter += 1
 print iter
This is the result:

Tuesday, June 28, 2011

Searching for IP address using regular expression

This snippet finds an IP address in a string using regex:
import re
ip = re.compile('(([2][5][0-5]\.)|([2][0-4][0-9]\.)|([0-1]?[0-9]?[0-9]\.)){3}'
                +'(([2][5][0-5])|([2][0-4][0-9])|([0-1]?[0-9]?[0-9]))')

match = ip.search("Your ip address is 192.168.0.1, have fun!")
if match:
 print 'IP address found:',
 print match.group(), # matching substring
 print 'at position',match.span() # indexes of the substring found
else:
 print 'IP address not found'
The output will be
IP address found: 192.168.0.1 at position (19, 30)

Thursday, June 9, 2011

Crawling the web with SGMLParser

In this example we will use SGMLParser in order to build a simple web crawler.
import urllib
from random import choice
from sgmllib import SGMLParser

class LinkExplorer(SGMLParser): 
 def reset(self):                              
  SGMLParser.reset(self) 
  self.links = [] # list with the urls

 def start_a(self, attrs):
  """ fill the links with the links in the page """
  for k in attrs:
   if k[0] == 'href' and k[1].startswith('http'): 
    self.links.append(k[1])

def explore(parser,s_url,maxvisit=10,iter=0):
 """ pick a random link in the page s_url
     and follow its links recursively """
 if iter < maxvisit: # it will stop after maxvisit iteration
  print '(',iter,') I am in',s_url
  usock = urllib.urlopen(s_url) # download the page
  parser.reset() # reset the list
  parser.feed(usock.read()) # parse the current page
  if len(parser.links) > 0:
   explore(parser,choice(parser.links),maxvisit,iter+1)
  else: # if the page has no links to follow
   print 'the page has no links'

# test the crawler starting from the python's website
parser = LinkExplorer()
explore(parser,"http://www.python.org/")
Let's go!
( 0 ) I am in http://www.python.org/
( 1 ) I am in http://wiki.python.org/moin/NumericAndScientific
( 2 ) I am in http://numpy.scipy.org/
( 3 ) I am in http://sphinx.pocoo.org/
( 4 ) I am in http://www.bitbucket.org/birkenfeld/sphinx/issues/
( 5 ) I am in http://blog.bitbucket.org
( 6 ) I am in http://haproxy.1wt.eu/
( 7 ) I am in http://www.olivepeak.com/blog/posts/read/free-your-port-80-with-haproxy
( 8 ) I am in http://www.olivepeak.com/peaknotes/
( 9 ) I am in http://notes.olivepeak.com/account/create