23 września 2009

Liferea script

#!/usr/bin/python
import urllib2, string, sys, re

#define start end end

scrapstart="<div class=\'snap_preview\'>"
scrapend="</div>"
#define other
witemie=0


#funkcje

def remove_html_tags(data):
p = re.compile(r'<.*?>')
return p.sub('', data)

def wyciagnij(urla):
response = urllib2.urlopen(urla)
htmllines = response.readlines()
wtresci=0
tresc=""
for liniahtm in htmllines:
if scrapstart in liniahtm: wtresci=1
if scrapend in liniahtm: wtresci=0
if wtresci==1:
tresc=tresc+liniahtm

tresc = remove_html_tags(tresc)
return tresc

#get xml feed
feedlines = sys.stdin.readlines()

for linia in feedlines:
if "<item>" in linia: witemie=1
if witemie==0: sys.stdout.write(linia)
if witemie==1:
if "<item>" in linia :sys.stdout.write(linia)
if "<title>" in linia :sys.stdout.write(linia)
if "<link>" in linia:
tulink=linia;
linia=remove_html_tags(linia)
sys.stdout.write("<description>")
opis=wyciagnij(linia)
sys.stdout.write(opis)
sys.stdout.write("</description>")
sys.stdout.write(tulink)
if "</item>" in linia :
sys.stdout.write(linia)
witemie=0


#koniec

Brak komentarzy:

Prześlij komentarz