当前位置：Gxlcms > Python > python访问纯真IP数据库的代码

python访问纯真IP数据库的代码

时间：2021-07-01 10:21:17 帮助过：22人阅读

核心代码：

#!/usr/bin/env python 
# -*- coding: utf-8 -*- 
from bisect import bisect 
_LIST1, _LIST2 = [], [] 
_INIT = False 
ip2int = lambda ip_str: reduce(lambda a, b: (a << 8) + b, [int(i) for i in ip_str.split('.')]) 
def _init(): 
global _LIST, _INIT 
if not _INIT: 
for l in open('ipdata.txt', 'rb'): 
ip1, ip2 = l.split()[:2] 
addr = ' '.join(l.split()[2:]) 
ip1, ip2 = ip2int(ip1), ip2int(ip2) 
_LIST1.append(ip1) 
_LIST2.append((ip1, ip2, addr)) 
_INIT = True 
def ip_from(ip): 
_init() 
i = ip2int(ip) 
idx = bisect(_LIST1, i) 
assert(idx > 0) 
if len(_LIST1) <= idx: 
return u'unknown ip address %s' % ip 
else: 
frm, to ,addr = _LIST2[idx - 1] 
if frm <= i <= to: 
return addr 
else: 
return u'unknown ip address %s' % ip 
if __name__ == '__main__': 
print ip_from('115.238.54.106') 
print ip_from('220.181.29.160') 
print ip_from('115.238.54.107') 
print ip_from('8.8.8.8')

代码打包下载 http://xiazai.bitsCN.com/201105/yuanma/ipaddress.7z

接下来为大家分享更完美的代码：

#!/usr/bin/env python
# coding: utf-8
 
'''用Python脚本查询纯真IP库
 
QQWry.Dat的格式如下:
 
+----------+
| 文件头 | (8字节)
+----------+
| 记录区 | （不定长）
+----------+
| 索引区 | （大小由文件头决定）
+----------+
 
文件头：4字节开始索引偏移值+4字节结尾索引偏移值
 
记录区： 每条IP记录格式 ==> IP地址[国家信息][地区信息]
 
  对于国家记录，可以有三种表示方式：
 
    字符串形式(IP记录第5字节不等于0x01和0x02的情况)，
    重定向模式1(第5字节为0x01),则接下来3字节为国家信息存储地的偏移值
    重定向模式(第5字节为0x02),
 
  对于地区记录，可以有两种表示方式： 字符串形式和重定向
 
  最后一条规则：重定向模式1的国家记录后不能跟地区记录
 
索引区： 每条索引记录格式 ==> 4字节起始IP地址 + 3字节指向IP记录的偏移值
 
  索引区的IP和它指向的记录区一条记录中的IP构成一个IP范围。查询信息是这个
  范围内IP的信息
 
'''
 
import sys
import socket
from struct import pack, unpack
 
class IPInfo(object):
  '''QQWry.Dat数据库查询功能集合
  '''
  def __init__(self, dbname):
    ''' 初始化类，读取数据库内容为一个字符串，
    通过开始8字节确定数据库的索引信息'''
 
    self.dbname = dbname
    # f = file(dbname, 'r')
 
    # Demon注：在Windows下用'r'会有问题，会把\r\n转换成\n
    # 详见http://demon.tw/programming/python-open-mode.html
    # 还有Python文档中不提倡用file函数来打开文件，推荐用open
    f = open(dbname, 'rb')
 
    self.img = f.read()
    f.close()
 
    # QQWry.Dat文件的开始8字节是索引信息,前4字节是开始索引的偏移值，
    # 后4字节是结束索引的偏移值。
    # (self.firstIndex, self.lastIndex) = unpack('II', self.img[:8])
 
    # Demon注：unpack默认使用的endian是和机器有关的
    # Intel x86和AMD64(x86-64)是little-endian
    # Motorola 68000和PowerPC G5是big-endian
    # 而纯真数据库全部采用了little-endian字节序
    # 所以在某些big-endian的机器上原代码会出错
    (self.firstIndex, self.lastIndex) = unpack('<ii', self.img[:8])="" #="" 每条索引长7字节，这里得到索引总个数="" self.indexcount="(self.lastIndex" -="" self.firstindex)="" 7="" +="" 1="" def="" getstring(self,="" offset="0):" '''="" 读取字符串信息，包括"国家"信息和"地区"信息="" qqwry.dat的记录区每条信息都是一个以'\0'结尾的字符串'''="" o2="self.img.find('\0'," offset)="" #return="" self.img[offset:o2]="" 有可能只有国家信息没有地区信息，="" gb2312_str="self.img[offset:o2]" try:="" utf8_str="unicode(gb2312_str,'gb2312').encode('utf-8')" except:="" return="" '未知'="" getlong3(self,="" '''qqwry.dat中的偏移记录都是3字节，本函数取得3字节的偏移量的常规表示="" qqwry.dat使用“字符串“存储这些值'''="" s="self.img[offset:" 3]="" unpack用一个'i'作为format，后面的字符串必须是4字节="" unpack('i',="" s)[0]="" demon注：和上面一样，强制使用little-endian="" unpack('<i',="" getareaaddr(self,="" 通过给出偏移值，取得区域信息字符串，'''="" byte="ord(self.img[offset])" if="" or="" 2:="" 第一个字节为1或者2时，取得2-4字节作为一个偏移量调用自己="" p="self.getLong3(offset" 1)="" self.getareaaddr(p)="" else:="" self.getstring(offset)="" getaddr(self,="" offset,="" ip="0):" img="self.img" o="offset" 1:="" 重定向模式1="" [ip][0x01][国家和地区信息的绝对偏移地址]="" 使用接下来的3字节作为偏移量调用字节取得信息="" self.getaddr(self.getlong3(o="" 1))="" 重定向模式2="" [ip][0x02][国家信息的绝对偏移][地区信息字符串]="" 使用国家信息偏移量调用自己取得字符串信息="" carea="self.getAreaAddr(self.getLong3(o" 跳过前4字节取字符串作为地区信息="" aarea="self.getAreaAddr(o)" (carea,="" aarea)="" !="1" and="" 最简单的ip记录形式，[ip][国家信息][地区信息]="" 重定向模式1有种情况就是偏移量指向包含国家和地区信息两个字符串="" 即偏移量指向的第一个字节不是1或2,就使用这里的分支="" 简单地说：取连续取两个字符串！="" #o="" 我们已经修改carea为utf-8字符编码了，len取得的长度会有变，="" 用下面方法得到offset="" "?":="" "信":="" "[":="" find(self,="" ip,="" l,="" r):="" 使用二分法查找网络字节编码的ip地址的索引记录'''="" r="" l="" <="1:" m="(l" r)="" 2="" *="" #new_ip="unpack('I'," self.img[o:="" o+4])[0]="" new_ip="unpack('<I'," self.find(ip,="" m)="" m,="" getipaddr(self,="" ip):="" 调用其他函数，取得信息！'''="" 使用网络字节编码ip地址="" socket.inet_aton(ip))[0]="" 使用="" self.find="" 函数查找ip的索引偏移="" i="self.find(ip," 0,="" 得到索引记录="" 索引记录格式是：="" 前4字节ip信息+3字节指向ip记录信息的偏移量="" 这里就是使用后3字节作为偏移量得到其常规表示（qqwry.dat用字符串表示值）="" 4)="" ip记录偏移值+4可以丢弃前4字节的ip地址信息。="" (c,="" a)="self.getAddr(o2" output(self,="" first,="" last):="" for="" in="" range(first,="" self.img[o:o+4])[0]))="" print="" "%s="" %d="" %s="" %s"="" %="" (ip,="" c,="" getip(ip):="" import="" os="" _localdir="os.path.dirname(__file__)" _curpath="os.path.normpath(os.path.join(os.getcwd(),_localDir))" curpath="_curpath" c+a="" main():="" os.path.exists(sys.argv[1]):="" line="" open(sys.argv[1],"r").readlines():="" demon注：如果是在windows命令行中运行把编码转回gb2312以避免乱码="" sys.platform="=" 'win32':="" c="unicode(c," 'utf-8').encode('gb2312')="" a="unicode(a," '%s="" %s'="" (line,="" (sys.argv[1],="" __name__="=" '__main__':="" main()<="" script=""></ii',>

用Python脚本查询纯真IP库QQWry.dat（Demon修改版）

由于要用 Python 读取一个和纯真IP数据库 QQWry.dat 格式差不多的 IPv6 数据库，所以在网上搜索了一下，在 LinuxTOY 看到了一个 Python 脚本，发现有一些小小的问题，于是修改了一下。

#!/usr/bin/env python
# coding: utf-8
# from: http://linuxtoy.org/files/pyip.py
# Blog: http://linuxtoy.org/archives/python-ip.html
# Modified by Demon
# Blog: http://demon.tw/programming/python-qqwry-dat.html
'''用Python脚本查询纯真IP库
QQWry.Dat的格式如下:
+----------+
| 文件头 | (8字节)
+----------+
| 记录区 | （不定长）
+----------+
| 索引区 | （大小由文件头决定）
+----------+
文件头：4字节开始索引偏移值+4字节结尾索引偏移值
记录区： 每条IP记录格式 ==> IP地址[国家信息][地区信息]
  对于国家记录，可以有三种表示方式：
    字符串形式(IP记录第5字节不等于0x01和0x02的情况)，
    重定向模式1(第5字节为0x01),则接下来3字节为国家信息存储地的偏移值
    重定向模式(第5字节为0x02),
  
  对于地区记录，可以有两种表示方式： 字符串形式和重定向
  最后一条规则：重定向模式1的国家记录后不能跟地区记录
索引区： 每条索引记录格式 ==> 4字节起始IP地址 + 3字节指向IP记录的偏移值
  索引区的IP和它指向的记录区一条记录中的IP构成一个IP范围。查询信息是这个
  范围内IP的信息
'''
import sys
import socket
from struct import pack, unpack
class IPInfo(object):
  '''QQWry.Dat数据库查询功能集合
  '''
  def __init__(self, dbname):
    ''' 初始化类，读取数据库内容为一个字符串，
    通过开始8字节确定数据库的索引信息'''
    
    self.dbname = dbname
    # f = file(dbname, 'r')
    # Demon注：在Windows下用'r'会有问题，会把\r\n转换成\n
    # 详见http://demon.tw/programming/python-open-mode.html
    # 还有Python文档中不提倡用file函数来打开文件，推荐用open
    f = open(dbname, 'rb')
    self.img = f.read()
    f.close()
    # QQWry.Dat文件的开始8字节是索引信息,前4字节是开始索引的偏移值，
    # 后4字节是结束索引的偏移值。
    # (self.firstIndex, self.lastIndex) = unpack('II', self.img[:8])
    # Demon注：unpack默认使用的endian是和机器有关的
    # Intel x86和AMD64(x86-64)是little-endian
    # Motorola 68000和PowerPC G5是big-endian
    # 而纯真数据库全部采用了little-endian字节序
    # 所以在某些big-endian的机器上原代码会出错
    (self.firstIndex, self.lastIndex) = unpack('<ii', self.img[:8])="" #="" 每条索引长7字节，这里得到索引总个数="" self.indexcount="(self.lastIndex" -="" self.firstindex)="" 7="" +="" 1="" def="" getstring(self,="" offset="0):" '''="" 读取字符串信息，包括"国家"信息和"地区"信息="" qqwry.dat的记录区每条信息都是一个以'\0'结尾的字符串'''="" o2="self.img.find('\0'," offset)="" #return="" self.img[offset:o2]="" 有可能只有国家信息没有地区信息，="" gb2312_str="self.img[offset:o2]" try:="" utf8_str="unicode(gb2312_str,'gb2312').encode('utf-8')" except:="" return="" '未知'="" getlong3(self,="" '''qqwry.dat中的偏移记录都是3字节，本函数取得3字节的偏移量的常规表示="" qqwry.dat使用“字符串“存储这些值'''="" s="self.img[offset:" 3]="" unpack用一个'i'作为format，后面的字符串必须是4字节="" unpack('i',="" s)[0]="" demon注：和上面一样，强制使用little-endian="" unpack('<i',="" getareaaddr(self,="" 通过给出偏移值，取得区域信息字符串，'''="" byte="ord(self.img[offset])" if="" or="" 2:="" 第一个字节为1或者2时，取得2-4字节作为一个偏移量调用自己="" p="self.getLong3(offset" 1)="" self.getareaaddr(p)="" else:="" self.getstring(offset)="" getaddr(self,="" offset,="" ip="0):" img="self.img" o="offset" 1:="" 重定向模式1="" [ip][0x01][国家和地区信息的绝对偏移地址]="" 使用接下来的3字节作为偏移量调用字节取得信息="" self.getaddr(self.getlong3(o="" 1))="" 重定向模式2="" [ip][0x02][国家信息的绝对偏移][地区信息字符串]="" 使用国家信息偏移量调用自己取得字符串信息="" carea="self.getAreaAddr(self.getLong3(o" 跳过前4字节取字符串作为地区信息="" aarea="self.getAreaAddr(o)" (carea,="" aarea)="" !="1" and="" 最简单的ip记录形式，[ip][国家信息][地区信息]="" 重定向模式1有种情况就是偏移量指向包含国家和地区信息两个字符串="" 即偏移量指向的第一个字节不是1或2,就使用这里的分支="" 简单地说：取连续取两个字符串！="" #o="" 我们已经修改carea为utf-8字符编码了，len取得的长度会有变，="" 用下面方法得到offset="" find(self,="" ip,="" l,="" r):="" 使用二分法查找网络字节编码的ip地址的索引记录'''="" r="" l="" <="1:" m="(l" r)="" 2="" *="" #new_ip="unpack('I'," self.img[o:="" o+4])[0]="" new_ip="unpack('<I'," self.find(ip,="" m)="" m,="" getipaddr(self,="" ip):="" 调用其他函数，取得信息！'''="" 使用网络字节编码ip地址="" socket.inet_aton(ip))[0]="" 使用="" self.find="" 函数查找ip的索引偏移="" i="self.find(ip," 0,="" 得到索引记录="" 索引记录格式是：="" 前4字节ip信息+3字节指向ip记录信息的偏移量="" 这里就是使用后3字节作为偏移量得到其常规表示（qqwry.dat用字符串表示值）="" 4)="" ip记录偏移值+4可以丢弃前4字节的ip地址信息。="" (c,="" a)="self.getAddr(o2" output(self,="" first,="" last):="" for="" in="" range(first,="" self.img[o:o+4])[0]))="" print="" "%s="" %d="" %s="" %s"="" %="" (ip,="" c,="" main():="" demon注：如果是在windows命令行中运行把编码转回gb2312以避免乱码="" sys.platform="=" 'win32':="" c="unicode(c," 'utf-8').encode('gb2312')="" a="unicode(a," '%s="" %s'="" (sys.argv[1],="" __name__="=" '__main__':="" main()="" changelog="" 时间：2009年5月29日="" 1.="" 工具下面网友的建议，修改"o="" 1"="" http:="" linuxtoy.org="" archives="" python-ip.html#comment-113960="" 因为这个时候我已经把得到的字符串变成utf-8编码了，长度会有变化！<="" script=""></ii',>

python访问纯真IP数据库的代码

人气教程排行