diff --git a/README.md b/README.md
index c29fedd..500ad4a 100644
--- a/README.md
+++ b/README.md
@@ -7,16 +7,22 @@ pinyin.py
 Example:
 
     from pinyin import PinYin
-    
-    test = PinYin()
-    test.load_word()
-    test.hanzi2pinyin(string='钓鱼岛是中国的')
 
+    test = PinYin()
 
 Out:
 
     test.hanzi2pinyin(string='钓鱼岛是中国的')
-    ['diao', 'yu', 'dao', 'shi', 'zhong', 'guo', 'de']    
+    ['diao', 'yu', 'dao', 'shi', 'zhong', 'guo', 'de']
     test.hanzi2pinyin_split(string='钓鱼岛是中国的', split="-")
     diao-yu-dao-shi-zhong-guo-de
 
+    test.hanzi2pinyin(string='hello world 123')
+    out: [u'helloworld123']
+    test.hanzi2pinyin_split(string='hello world 123', split="_")
+    out: helloworld123
+
+    test.hanzi2pinyin(string='hello 中国 123')
+    out: [u'hello', 'zhong', 'guo', u'123']
+    test.hanzi2pinyin_split(string='hello 中国 123', split="_")
+    out: hello_zhong_guo_123
diff --git a/pinyin.py b/pinyin.py
index 938b727..f659894 100644
--- a/pinyin.py
+++ b/pinyin.py
@@ -16,7 +16,7 @@ class PinYin(object):
     def __init__(self, dict_file='word.data'):
         self.word_dict = {}
         self.dict_file = dict_file
-
+        self.load_word()
 
     def load_word(self):
         if not os.path.exists(self.dict_file):
@@ -31,19 +31,30 @@ def load_word(self):
                     line = f_line.split('   ')
                     self.word_dict[line[0]] = line[1]
 
-
     def hanzi2pinyin(self, string=""):
         result = []
+        alnum = []
+
         if not isinstance(string, unicode):
             string = string.decode("utf-8")
-        
-        for char in string:
+
+        for char in string.replace(' ', ''):
             key = '%X' % ord(char)
-            result.append(self.word_dict.get(key, char).split()[0][:-1].lower())
+            word = self.word_dict.get(key, char).split()[0]
+            if len(word) == 1:
+                # 拼音都有声调 长度大于1
+                alnum.append(word)
+            else:
+                if alnum:
+                    words = ''.join(alnum)
+                    alnum = []
+                    result.append(words)
+                result.append(word[:-1].lower())
+        if alnum:
+            result.append(''.join(alnum))
 
         return result
 
-
     def hanzi2pinyin_split(self, string="", split=""):
         result = self.hanzi2pinyin(string=string)
         if split == "":
@@ -54,8 +65,15 @@ def hanzi2pinyin_split(self, string="", split=""):
 
 if __name__ == "__main__":
     test = PinYin()
-    test.load_word()
     string = "钓鱼岛是中国的"
     print "in: %s" % string
     print "out: %s" % str(test.hanzi2pinyin(string=string))
-    print "out: %s" % test.hanzi2pinyin_split(string=string, split="-")
+    print "out: %s" % test.hanzi2pinyin_split(string=string, split="_")
+    string = "hello world 123"
+    print "in: %s" % string
+    print "out: %s" % str(test.hanzi2pinyin(string=string))
+    print "out: %s" % test.hanzi2pinyin_split(string=string, split="_")
+    string = "hello 中国 123"
+    print "in: %s" % string
+    print "out: %s" % str(test.hanzi2pinyin(string=string))
+    print "out: %s" % test.hanzi2pinyin_split(string=string, split="_")
diff --git a/setup.py b/setup.py
index 40d5de1..7842f85 100644
--- a/setup.py
+++ b/setup.py
@@ -27,5 +27,4 @@
                 'Topic :: Software Development :: Libraries',
                 'Topic :: Software Development :: Libraries :: Python Modules'
                 ]
-                    
     )
diff --git a/word.data b/word.data
index e8bc443..1884208 100644
--- a/word.data
+++ b/word.data
@@ -1,3 +1,65 @@
+0x30    0
+0x31    1
+0x32    2
+0x33    3
+0x34    4
+0x35    5
+0x36    6
+0x37    7
+0x38    8
+0x39    9
+0x41    A
+0x42    B
+0x43    C
+0x44    D
+0x45    E
+0x46    F
+0x47    G
+0x48    H
+0x49    I
+0x4a    J
+0x4b    K
+0x4c    L
+0x4d    M
+0x4e    N
+0x4f    O
+0x50    P
+0x51    Q
+0x52    R
+0x53    S
+0x54    T
+0x55    U
+0x56    V
+0x57    W
+0x58    X
+0x59    Y
+0x5a    Z
+0x61    a
+0x62    b
+0x63    c
+0x64    d
+0x65    e
+0x66    f
+0x67    g
+0x68    h
+0x69    i
+0x6a    j
+0x6b    k
+0x6c    l
+0x6d    m
+0x6e    n
+0x6f    o
+0x70    p
+0x71    q
+0x72    r
+0x73    s
+0x74    t
+0x75    u
+0x76    v
+0x77    w
+0x78    x
+0x79    y
+0x7a    z
 3400    QIU1
 3401    TIAN3 TIAN4
 3404    KUA4