新浪微博地址url字符与mid的相互转换原理及 代码实现(php+python)

参考:http://qinxuye.me/article/mid-and-url-in-sina-weibo/  http://fengbin.me/2011/06/weibo-mid-url-str/  http://blog.sina.com.cn/s/blog_4a238ec201012pnb.html


  新浪微博的URL都是如:http://weibo.com/2196734667/ArOPrxOnF 这样三部分: 域名/uid/mid   第一部分为新浪微博的域名, 第二部分为博主uid, 第三部分为一串貌似随机的字符串(实际上是微博mid). 在开始计算之前有必要说明一下,什么是base62编码。它实际上就是十进制和62位进制的互换。对于62进制,从0数到9以后,10用小写字母a表示,接着数完26个字母,到z为35,然后36为大写字母A,一直到61为大写字母Z。所以,我们可以实现十进制数字base62编码的encode和decode。 ArOPrxOnF它的计算其实也很简单,从后向前四个字符一组,就得到: A rOPr xOnF 将每个字符串用base62编码来decode,就可以得到它们的十进制数字分别为: 36 6630245 8058491 将它们拼起来就可以得到mid为:“3666302458058491”。这里要强调的是:对于除了开头的字符串,如果得到的十进制数字不足7位,需要在前面补足0。比如得到的十进制数分别为:36,30245,8906190,则需要在30245前面添上两个0。 base62编码   Python代码:


ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

def base62_encode(num, alphabet=ALPHABET):
    """Encode a number in Base X

    `num`: The number to encode
    `alphabet`: The alphabet to use for encoding
    """
    if (num == 0):
        return alphabet[0]
    arr = []
    base = len(alphabet)
    while num:
        rem = num % base
        num = num // base
        arr.append(alphabet[rem])
    arr.reverse()
    return ''.join(arr)

def base62_decode(string, alphabet=ALPHABET):
    """Decode a Base X encoded string into the number

    Arguments:
    - `string`: The encoded string
    - `alphabet`: The alphabet to use for encoding
    """
    base = len(alphabet)
    strlen = len(string)
    num = 0

    idx = 0
    for char in string:
        power = (strlen - (idx + 1))
        num += alphabet.index(char) * (base ** power)
        idx += 1

    return num

url和mid 互转 python代码:


def url_to_mid(url):
    url = str(url)[::-1]
    if len(url) % 4 == 0:
		size = len(url) / 4
	else:
		size = len(url) / 4 + 1
    result = []
    for i in range(size):
        s = url[i * 4: (i + 1) * 4][::-1]
        s = str(base62_decode(str(s)))
        s_len = len(s)
        if i < size - 1 and s_len < 7:
            s = (7 - s_len) * '0' + s
        result.append(s)
    result.reverse()
    return int(''.join(result))

def mid_to_url(midint):
    midint = str(midint)[::-1]
	if len(midint) % 7 == 0:
		size = len(midint) / 7
	else:
		size = len(midint) / 7 + 1
    result = []
    for i in range(size):
        s = midint[i * 7: (i + 1) * 7][::-1]
        s = base62_encode(int(s))
        s_len = len(s)
        if i < size - 1 and len(s) < 4:
            s = '0' * (4 - s_len) + s
        result.append(s)
    result.reverse()
    return ''.join(result)

PHP代码:


function sinaWburl2ID($url) {
	$surl[2] = str62to10(substr($url, strlen($url) - 4, 4));
	$surl[1] = str62to10(substr($url, strlen($url) - 8, 4));
	$surl[0] = str62to10(substr($url, 0, strlen($url) - 8));
	$int10 = $surl[0] . $surl[1] . $surl[2];
	return ltrim($int10, '0');
}
function str62to10($str62) { //62进制到10进制
	$strarry = str_split($str62);
	$str = 0;
	for ($i = 0; $i < strlen($str62); $i++) {
		$vi = Pow(62, (strlen($str62) - $i -1));

		$str += $vi * str62keys($strarry[$i]);
	}
	$str = str_pad($str, 7, "0", STR_PAD_LEFT);
	return $str;
}

function str62keys($ks) //62进制字典
{
	$str62keys = array (
		"0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q",
		"r","s","t","u","v","w","x","y","z","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q",
		"R","S","T","U","V","W","X","Y","Z"
	);
	return array_search($ks, $str62keys);
}
echo  sinaWburl2ID('ArOPrxOnF'); //调用

function midToStr($mid) {
	settype($mid, 'string');
	$mid_length = strlen($mid);
	$url = '';
	$str = strrev($mid);
	$str = str_split($str, 7);

	foreach ($str as $v) {
		$char = intTo62(strrev($v));
		$char = str_pad($char, 4, "0");
		$url .= $char;
	}

	$url_str = strrev($url);

	return ltrim($url_str, '0');
}

function str62keys_int_62($key) //62进制字典
{
	$str62keys = array (
		"0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q",
		"r","s","t","u","v","w","x","y","z","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q",
		"R","S","T","U","V","W","X","Y","Z"
	);
	return $str62keys[$key];
}

/* url 10 进制 转62进制*/

function intTo62($int10) {
	$s62 = '';
	$r = 0;
	while ($int10 != 0) {
		$r = $int10 % 62;
		$s62 .= str62keys_int_62($r);
		$int10 = floor($int10 / 62);
	}

	return $s62;
}
echo midToStr($mid);//调用

发表评论

This site uses Akismet to reduce spam. Learn how your comment data is processed.