function parseTwit(str)
{
//parse URL
str = str.replace(/[A-Za-z]+:\/\/[A-Za-z0-9-_]+\.[A-Za-z0-9-_:%&~\?\/.=]+/g,function(s){
return s.link(s);
});
//parse user_name
str = str.replace(/[@]+[A-Za-z0-9_]+/g,function(s){
var user_name = s.replace('@','');
return s.link("http://twitter.com/"+user_name);
});
//parse hashtag
str = str.replace(/[#]+[A-Za-z0-9_]+/g,function(s){
var hashtag = s.replace('#','');
return s.link("http://search.twitter.com/search?q="+hashtag);
});
return str;
}
DEMOSimple regular expressions are used here. But you can use more robust and advanced regular expressions also.
For URL
/[A-Za-z]+:\/\/[A-Za-z0-9-_]+\.[A-Za-z0-9-_:%&~\?\/.=]+/gFor Mention
/[@]+[A-Za-z0-9_]+/gFor Hashtag
/[#]+[A-Za-z0-9_]+/gSame output can also be obtained from PHP in server side.
function parseTwit($str)
{
$patterns = array();
$replace = array();
//parse URL
preg_match_all("/[A-Za-z]+:\/\/[A-Za-z0-9-_]+\.[A-Za-z0-9-_:%&~\?\/.=]+/",$str,$urls);
foreach($urls[0] as $url)
{
$patterns[] = $url;
$replace[] = '<a href="'.$url.'" >'.$url.'</a>';
}
//parse hashtag
preg_match_all("/[#]+([a-zA-Z0-9_]+)/",$str,$hashtags);
foreach($hashtags[1] as $hashtag)
{
$patterns[] = '#'.$hashtag;
$replace[] = '<a href="http://search.twitter.com/search?q='.$hashtag.'" >#'.$hashtag.'</a>';
}
//parse mention
preg_match_all("/[@]+([a-zA-Z0-9_]+)/",$str,$usernames);
foreach($usernames[1] as $username)
{
$patterns[] = '@'.$username;
$replace[] = '<a href="http://twitter.com/'.$username.'" >@'.$username.'</a>';
}
//replace now
$str = str_replace($patterns,$replace,$str);
//
return $str;
}
PHP function preg_match_all matches the regular expression globally and it is similar to /g modifier. All the matches and to be replaced values are saved in the array. After parsing all, those findings are replaced at once by str_replace function.Img src : hughlashbrooke




1 comments:
This works great! However, I wanted to mention that your URL regex will not parse properly if there is a period (.) in the tweet directly after the URL. To prevent this, use this regex instead:
/[A-Za-z]+:\/\/[A-Za-z0-9-_]+\.[A-Za-z0-9-_:%&~\?\/.=]+[A-Za-z0-9-_:%&~\?\/=]/
Since the URLs given out by twitter's API are all http://t.co shortened URLs, you don't have to worry about a period being a valid character at the end of the URL.
Post a Comment