#203 利用Google app engine同步twitter消息到sina微博

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#to ensure the utf8 encoding environment
import sys
default_encoding = 'utf-8'
if sys.getdefaultencoding() != default_encoding:
    reload(sys)
    sys.setdefaultencoding(default_encoding)

import re
import urllib,urllib2,Cookie
from google.appengine.api import urlfetch
from google.appengine.ext import db

class Twitter(db.Model):
    id=db.StringProperty()

def make_cookie_header(cookie):
    ret = ""
    for val in cookie.values():
        ret+="%s=%s; "%(val.key, val.value)
    return ret


def send_sina_msgs(username,password,msgs):
    '''send sina msgs. use sina username, password.
    the msgs parameter is a message list, not a single string.       
    '''
    result = urlfetch.fetch(url="https://login.sina.com.cn/sso/login.php?username=%s&password=%s&returntype=TEXT"%(username,password))

    cookie = Cookie.SimpleCookie(result.headers.get('set-cookie', ''))
    
    for msg in msgs:        

        form_fields = {
          "content": msg,          
        }
        form_data = urllib.urlencode(form_fields)

        result = urlfetch.fetch(url="http://t.sina.com.cn/mblog/publish.php",
                            payload=form_data,
                            method=urlfetch.POST,
                            headers={'Referer':'http://t.sina.com.cn','Cookie' : make_cookie_header(cookie)})
        #print ""
        #print result.content
        
    
def unescape(text):
   """Removes HTML or XML character references 
      and entities from a text string.
      keep &, >, < in the source code.
   from Fredrik Lundh
   http://effbot.org/zone/re-sub.htm#unescape-html
   """
   def fixup(m):
      text = m.group(0)
      if text[:2] == "&#":
         # character reference
         try:
            if text[:3] == "&#x":
               return unichr(int(text[3:-1], 16))
            else:
               return unichr(int(text[2:-1]))
         except ValueError:
            print "erreur de valeur"
            pass
      else:
         # named entity
         try:
            if text[1:-1] == "amp":
               text = "&"
            elif text[1:-1] == "gt":
               text = ">"
            elif text[1:-1] == "lt":
               text = "<"
            else:
               print text[1:-1]
               text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
         except KeyError:
            print "keyerror"
            pass
      return text # leave as is
   return re.sub("&#?\w+;", fixup, text)

#get one page of to user's replies, 20 messages at most. 
def parseTwitter(twitter_id,sina_id,sina_sn,since_id="",):
    if since_id:
        url="http://twitter.com/statuses/user_timeline/%s.xml?since_id=%s"%(twitter_id,since_id)
    else:
        url="http://twitter.com/statuses/user_timeline/%s.xml"%(twitter_id)
    #print url
    result = urlfetch.fetch(url)
    #print result.content
    if result.status_code == 200:
        content=result.content
        m= re.findall(r"(?i)<id>([^<]+)</id>\s*<text>(?!@)([^<]+)</text>", content)
        
        s=[]    #s is a list for messages 
        for x in reversed(m):            
            id=x[0]
            text=unescape(x[1]) 
            s.append(text) 
        #for m in s:
        #    print m    
        #return
        try:
            send_sina_msgs(sina_id,sina_sn,s)
            msg=Twitter() 
            msg.id=id
            msg.put()
        except:
            print "send sina messages error"
    else:
        print "get twitter data error"
        
def getLatest():
    msg=db.GqlQuery("SELECT * FROM Twitter ORDER BY id DESC")
    x=msg.count()
    if x:
        return msg[0].id
    else:
        return ""


print ""
latest=getLatest() 

#modify here!!!
parseTwitter(twitter_id="haitai",sina_id="hity",sina_sn="******",since_id=latest)