Si existe un ser humano que esta haciendole seguimiento al proyecto SnowRSS, aqui puede ver los cambios realizados desde anoche.
– Agregamos una tabla para almacenar informacion relacionada a los enclosures de los posts RSS. Es decir, soportaremos videocasts y podcasts.
– Acomodamos de manera radical (refactoring) la forma en que haciamos las conexiones a la BD. Ahora todas las conexiones son simples instancias
de MySQLdb.cursor, como variables que se crean en la pila de cada metodo, se usan, se desconectan y listo. El servidor estaba teniendo problemas con
muchas conexiones abiertas, ahora conexion que no usamos, conexion que desechamos. Este approach nos ha servidor muy bien con nuestra BD de lyrics en PHP
y el overhead de conexion y desconexion parece ser minimo ya que el servidor es local.
– Muchos otros fixes, limpieza, mayor busqueda del uso de OO en el codigo.
Index: sql/snowrss.sql
===================================================================
--- sql/snowrss.sql (revision 20684)
+++ sql/snowrss.sql (revision 20690)
@@ -53,3 +53,15 @@
`BL_fk_lang_code` text,
PRIMARY KEY (`BL_fk_blog_id`,`BL_fk_lang_code` (2))
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
+
+DROP TABLE IF EXISTS `ENCLOSURES`;
+CREATE TABLE `ENCLOSURES` (
+ `Enc_pk_id` int(11) NOT NULL auto_increment,
+ `Enc_fk_post_id` int(11) NOT NULL,
+ `Enc_url` text NOT NULL,
+ `Enc_creation_timestamp` double default 0,
+ `Enc_update_timestamp` double default 0,
+ `Enc_length` double default 0,
+ `Enc_type` text,
+ PRIMARY KEY (`Enc_pk_id`)
+) ENGINE=MyISAM DEFAULT CHARSET=utf8;
Index: snowrss.py
===================================================================
--- snowrss.py (revision 20684)
+++ snowrss.py (revision 20690)
@@ -1,11 +1,12 @@
import MySQLdb
from MySQLdb import MySQLError
-from snowrss_config import getDBCon
+from snowrss_config import *
import feedparser,sys,os,time
from threading import Thread
class Post:
'''Receives a dictionary with
+ [id]:
blog_id:
post_title:
post_author:
@@ -14,43 +15,58 @@
post_timestamp:
post_tags:[]
'''
- def __init__(self,props,cursor):
+ def __init__(self,props):
self.properties = props
- self.cursor = cursor
def setBlogId(self,id=None):
self.blog_id = id
- def __reconnect(self):
- """Reconnects to the database"""
- self.con.ping() # = getDBCon()
- #self.cursor = self.con.cursor(MySQLdb.cursors.DictCursor)
-
def save(self):
sql = 'insert into BLOG_POSTS '
sql += '(BP_fk_blog_id,BP_title,BP_author,BP_link,BP_summary,BP_timestamp) '
sql += "values (%d,'%s','%s','%s','%s',%d);"
sql = sql % (self.properties['blog_id'],
- self.properties['post_title'] .replace("'",""").encode('utf8'),
- self.properties['post_author'].replace("'",""").encode('utf8'),
- self.properties['post_link'].replace("'",""").encode('utf8'),
- self.properties['post_summary'].replace("'",""").encode('utf8'),
+ self.properties['post_title'] .replace("'","\'").encode('utf8'),
+ self.properties['post_author'].replace("'","\'").encode('utf8'),
+ self.properties['post_link'].replace("'","\'").encode('utf8'),
+ self.properties['post_summary'].replace("'","\'").encode('utf8'),
self.properties['post_timestamp'])
try:
- self.cursor.execute(sql)
+ cursor = getDbCursor()
+ cursor.execute(sql)
+ cursor.connection.close()
except Exception, e:
#MySQL has gone away
- if (e[0] == 2006):
- self._reconnect()
- self.save()
- print 'Post.save(): ' + sql
- print e
- print "RECONNECTING"
- print
+ print 'Post.save(): ' + sql
+ print e
- def wasUpdated(ignoreme,cursor,post_link,post_timestamp):
+ self.setIdFromTitle()
+
+ def setIdFromTitle(self):
+ #we fetch the id of the post.
+ sql = "SELECT BP_pk_id FROM BLOG_POSTS WHERE BP_link = '%s'"
+ sql = sql % self.properties['post_link']
+
+ cursor = None
+ try:
+ cursor = getDbCursor()
+ cursor.execute(sql)
+ cursor.connection.close()
+ except Exception, e:
+ print
+ print "Post.setIdFromTitle(): " + sql
+ print e
+ print
+
+ data = cursor.fetchone()
+ self.properties['id'] = int(data['BP_pk_id'])
+
+ def getId(self):
+ return int(self.properties['id'])
+
+ def wasUpdated(ignoreme,post_link,post_timestamp):
'''Returns true if the post identified by the post_link has an older timestamp than the given one.'''
sql='select BP_timestamp 1:
#If there are many repetitions, delete all but one.
appearances = int(data['posted_before'])
@@ -120,8 +144,9 @@
print detail
try:
- cursor.connection.ping()
+ cursor = getDbCursor()
cursor.execute(sql)
+ cursor.connection.close()
except MySQLdb.Error, e:
print "Post.exists() Error cleaning posts [%s] - %d: %s" % (post_link,
e.args[0],
@@ -130,18 +155,25 @@
sys.exit()
return True
-
if int(data['posted_before']) >= 1:
return True
return False
exists = classmethod(exists)
- def load(self,id):
- '''Loads all the post data given the post id'''
+ def hasEnclosure(self):
+ """Tells if there's an enclosure on the database related to this post"""
+ #TODO
+ return True
+
+ def getEnclosure(self):
+ """Returns an enclosure if this post has one."""
+ #TODO
+ if self.hasEnclosure():
+ return True
class Blog:
- def __init__(self,con,props=None):
+ def __init__(self,props=None):
'''Receives a dictionary
id:
blog_name:
@@ -158,32 +190,13 @@
if self.props == None:
self.props = {}
-
- self.setConnection(con)
- self.setRunsAsThread(False)
- def setConnection(self, con):
- self.con = con
- self.setCursor(self.con.cursor(MySQLdb.cursors.DictCursor))
+ def getId(self):
+ return self.props['id']
- def setRunsAsThread(self, isThread=True):
- '''To determine if this Blog will fetch its posts on a separate thread or not'''
- self.props['is_thread'] = isThread
-
- def runsAsThread(self):
- return self.props['is_thread']
-
- def setCursor(self, cursor):
- self.cursor = cursor
-
def __str__(self):
return str(self.props['id']) + ' - ' + self.props['blog_name']
- def __reconnect(self):
- """Reconnects to the database"""
- self.con = getDBCon()
- self.cursor = self.con.cursor(MySQLdb.cursors.DictCursor)
-
def getUrl(self):
return self.props['blog_url']
@@ -199,14 +212,14 @@
sql = sql % (self.props['blog_url'], int(self.props['id']))
try:
- self.cursor.execute(sql)
+ cursor = getDbCursor()
+ cursor.execute(sql)
+ cursor.connection.close()
except Exception, e:
print
+ print 'Blog.updateUrl(): ' + sql
print e
- print 'Blog.updateUrl(): ' + sql
- print "RECONNECTING"
- self.__reconnect()
- self.updateUrl(url)
+ print
def getRssUrl(self):
return self.props['blog_rss_url']
@@ -223,14 +236,15 @@
sql = sql % (self.props['blog_rss_url'], int(self.props['id']))
try:
- self.cursor.execute(sql)
+ cursor = getDbCursor()
+ cursor.execute(sql)
+ cursor.connection.close()
except Exception, e:
print
+ print 'Blog.updateRssUrl(): ' + sql
print e
- print 'Blog.updateRssUrl(): ' + sql
- print "RECONNECTING"
- self.__reconnect()
- self.updateRssUrl(rssUrl)
+ print
+
def hasLanguageCode(self,lang_code,refresh=False):
@@ -263,10 +277,14 @@
sql = "INSERT INTO BLOG_LANGUAGES VALUES (%d,'%s');" % (self.props['id'],
lang_code)
try:
- self.cursor.execute(sql)
+ cursor = getDbCursor()
+ cursor.execute(sql)
+ cursor.connection.close()
except MySQLdb.Error, e:
+ print
print "Blog.addLanguageCodes() Error: %d: %s" % (e.args[0],e.args[1])
print sql
+ print
sys.exit()
self.getLanguageCodes(True) #Refresh
@@ -279,10 +297,14 @@
sql = "DELETE FROM BLOG_LANGUAGES WHERE BL_fk_blog_id = %d" % (self.props['id'])
try:
- self.cursor.execute(sql)
+ cursor = getDbCursor()
+ cursor.execute(sql)
+ cursor.connection.close()
except MySQLdb.Error, e:
+ print
print "Blog.clearLanguageCodes() Error: %d: %s" % (e.args[0],e.args[1])
print sql
+ print
sys.exit()
return True
@@ -306,10 +328,14 @@
sql = sql % (self.props['id'],lang_code)
try:
- self.cursor.execute(sql)
+ cursor = getDbCursor()
+ cursor.execute(sql)
+ cursor.connection.close()
except MySQLdb.Error, e:
+ print
print "Blog.removeLanguageCodes() Error: %d: %s" % (e.args[0],e.args[1])
print sql
+ print
sys.exit()
#clear and reload current languages from the database
@@ -333,13 +359,17 @@
sql += 'WHERE BL_fk_blog_id = ' + str(self.props['id'])
try:
- self.cursor.execute(sql)
+ cursor = getDbCursor()
+ cursor.execute(sql)
+ cursor.connection.close()
except MySQLdb.Error, e:
+ print
print "Blog.getLanguageCodes() Error: %d: %s" % (e.args[0],e.args[1])
print sql
+ print
sys.exit()
- result_set = self.cursor.fetchall()
+ result_set = cursor.fetchall()
if len(result_set) = max_threads:
-# for t in threads:
-# t.join()
-# threads.remove(threads[0])
-# if len(threads) > ((max_threads/2) + 1):
- #print "We can continue without waiting now"
-# break
+ if parallel_fetching==False:
+ # LINEAL FETCHING
+ blog.fetchPosts()
+ print blog.getName()
+ else:
+ #PARALLEL FETCHING
+ if (len(threads) = max_threads:
+ for t in threads:
+ t.join()
+ threads.remove(threads[0])
+
+ if len(threads) > ((max_threads/2) + 1):
+ #print "We can continue without waiting now"
+ break
Index: util/check_who_applied.py
===================================================================
--- util/check_who_applied.py (revision 20684)
+++ util/check_who_applied.py (revision 20690)
@@ -8,10 +8,8 @@
print "Could not import snowrss_config"
sys.exit()
-con = snowrss_config.getDBCon()
+cursor = getDbCursor()
-cursor = con.cursor()
-
sql = "SELECT * FROM BLOGS WHERE Blog_active != 1;"
cursor.execute(sql)
@@ -26,9 +24,7 @@
print "Status: %s" % b['Blog_active']
print "Podcast: %s" % b['Blog_is_podcast']
- blog = Blog(con)
+ blog = Blog()
blog.load(int(b['Blog_pk_id']))
print "Languages: %s" % str(blog.getLanguageCodes())
print
-
-con.close()
Index: util/fetch_enclosures.py
===================================================================
--- util/fetch_enclosures.py (revision 0)
+++ util/fetch_enclosures.py (revision 20690)
@@ -0,0 +1,23 @@
+import sys
+sys.path.append('/home/wedoit4y/www/news4you/SnowRSS/')
+
+try:
+ import snowrss_config
+ from snowrss import *
+except Exception,e:
+ print "Could not import snowrss_config [%s]" % e
+ sys.exit()
+
+blogs = Blog.getBlogs()
+podcasts = []
+
+for blog in blogs:
+ if blog.isPodcast():
+ podcasts.append(blog)
+
+print "Let's work with %d podcasts" % len(podcasts)
+
+for podcast in podcasts:
+ print "Scanning %s" % podcast.getName()
+
+ podcast.updateEnclosures()
Index: util/check_stats.py
===================================================================
--- util/check_stats.py (revision 20684)
+++ util/check_stats.py (revision 20690)
@@ -7,18 +7,18 @@
print "Could not import snowrss_config"
sys.exit()
-con = snowrss_config.getDBCon()
+con = snowrss_config.g
cursor = con.cursor()
-sql = "select BP_fk_blog_id, COUNT(BP_fk_blog_id) as post_count, SUM(PH_hits) as hits, BLOGS.* FROM BLOG_POSTS JOIN BLOGS ON BP_fk_blog_id = Blog_pk_id JOIN POST_HITS ON BP_pk_id = PH_fk_post_id WHERE Blog_active=1 GROUP BY BP_fk_blog_id ORDER BY hits DESC;"
+sql = "select BP_fk_blog_id, COUNT(BP_fk_blog_id) as post_count, SUM(PH_hits) as hits, Blog_active, BLOGS.* FROM BLOG_POSTS JOIN BLOGS ON BP_fk_blog_id = Blog_pk_id JOIN POST_HITS ON BP_pk_id = PH_fk_post_id GROUP BY BP_fk_blog_id ORDER BY hits DESC;"
cursor.execute(sql)
rows = cursor.fetchall()
i=1
-print "PlacetHitstPoststBlog"
+print "PlacetHitstPoststBlogtIDtActive"
for b in rows:
- print "%dt%st%st%s" % (i,b['hits'],b['post_count'],b['Blog_name'])
+ print "%dt%st%st%st%dt%s" % (i,b['hits'],b['post_count'],b['Blog_name'],b['BP_fk_blog_id'],b['Blog_active'])
i+=1
con.close()
Index: util/blog_is_podcast.py
===================================================================
--- util/blog_is_podcast.py (revision 20684)
+++ util/blog_is_podcast.py (revision 20690)
@@ -20,7 +20,7 @@
print "The ID of the blog needs to be an int"
sys.exit()
-blog = Blog(getDBCon())
+blog = Blog()
blog.load(int(sys.argv[1]))
if blog.isPodcast()==False:
Index: util/activate_blog.py
===================================================================
--- util/activate_blog.py (revision 20684)
+++ util/activate_blog.py (revision 20690)
@@ -8,8 +8,6 @@
print "Could not import snowrss_config [%s]" % e
sys.exit()
-
-
if len(sys.argv)