| 1 | |
|---|
| 2 | """ |
|---|
| 3 | Read flash video (.flv files) metadata |
|---|
| 4 | |
|---|
| 5 | from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/457406 |
|---|
| 6 | Submitter: Matthew Sherborne (2005/11/21) |
|---|
| 7 | """ |
|---|
| 8 | |
|---|
| 9 | from struct import unpack |
|---|
| 10 | from datetime import datetime |
|---|
| 11 | |
|---|
| 12 | class FLVReader(dict): |
|---|
| 13 | """ |
|---|
| 14 | Reads metadata from FLV files |
|---|
| 15 | """ |
|---|
| 16 | |
|---|
| 17 | # Tag types |
|---|
| 18 | AUDIO = 8 |
|---|
| 19 | VIDEO = 9 |
|---|
| 20 | META = 18 |
|---|
| 21 | UNDEFINED = 0 |
|---|
| 22 | |
|---|
| 23 | def __init__(self, filename): |
|---|
| 24 | """ |
|---|
| 25 | Pass the filename of an flv file and it will return a dictionary of meta |
|---|
| 26 | data. |
|---|
| 27 | """ |
|---|
| 28 | # Lock on to the file |
|---|
| 29 | self.file = open(filename, 'rb') |
|---|
| 30 | self.signature = self.file.read(3) |
|---|
| 31 | assert self.signature == 'FLV', 'Not an flv file' |
|---|
| 32 | self.version = self.readbyte() |
|---|
| 33 | self.typeFlags = self.readbyte() |
|---|
| 34 | self.dataOffset = self.readint() |
|---|
| 35 | extraDataLen = self.dataOffset - self.file.tell() |
|---|
| 36 | self.extraData = self.file.read(extraDataLen) |
|---|
| 37 | self.readtag() |
|---|
| 38 | |
|---|
| 39 | def readtag(self): |
|---|
| 40 | unknown = self.readint() |
|---|
| 41 | tagType = self.readbyte() |
|---|
| 42 | dataSize = self.read24bit() |
|---|
| 43 | timeStamp = self.read24bit() |
|---|
| 44 | unknown = self.readint() |
|---|
| 45 | if tagType == self.AUDIO: |
|---|
| 46 | print "Can't handle audio tags yet" |
|---|
| 47 | elif tagType == self.VIDEO: |
|---|
| 48 | print "Can't handle video tags yet" |
|---|
| 49 | elif tagType == self.META: |
|---|
| 50 | endpos = self.file.tell() + dataSize |
|---|
| 51 | event = self.readAMFData() |
|---|
| 52 | metaData = self.readAMFData() |
|---|
| 53 | # We got the meta data. |
|---|
| 54 | # Our job is done. |
|---|
| 55 | # We are complete |
|---|
| 56 | self.update(metaData) |
|---|
| 57 | elif tagType == self.UNDEFINED: |
|---|
| 58 | print "Can't handle undefined tags yet" |
|---|
| 59 | |
|---|
| 60 | def readint(self): |
|---|
| 61 | data = self.file.read(4) |
|---|
| 62 | return unpack('>I', data)[0] |
|---|
| 63 | |
|---|
| 64 | def readshort(self): |
|---|
| 65 | data = self.file.read(2) |
|---|
| 66 | return unpack('>H', data)[0] |
|---|
| 67 | |
|---|
| 68 | def readbyte(self): |
|---|
| 69 | data = self.file.read(1) |
|---|
| 70 | return unpack('B', data)[0] |
|---|
| 71 | |
|---|
| 72 | def read24bit(self): |
|---|
| 73 | b1, b2, b3 = unpack('3B', self.file.read(3)) |
|---|
| 74 | return (b1 << 16) + (b2 << 8) + b3 |
|---|
| 75 | |
|---|
| 76 | def readAMFData(self, dataType=None): |
|---|
| 77 | if dataType is None: |
|---|
| 78 | dataType = self.readbyte() |
|---|
| 79 | funcs = { |
|---|
| 80 | 0: self.readAMFDouble, |
|---|
| 81 | 1: self.readAMFBoolean, |
|---|
| 82 | 2: self.readAMFString, |
|---|
| 83 | 3: self.readAMFObject, |
|---|
| 84 | 8: self.readAMFMixedArray, |
|---|
| 85 | 10: self.readAMFArray, |
|---|
| 86 | 11: self.readAMFDate |
|---|
| 87 | } |
|---|
| 88 | func = funcs[dataType] |
|---|
| 89 | if callable(func): |
|---|
| 90 | return func() |
|---|
| 91 | |
|---|
| 92 | def readAMFDouble(self): |
|---|
| 93 | return unpack('>d', self.file.read(8))[0] |
|---|
| 94 | |
|---|
| 95 | def readAMFBoolean(self): |
|---|
| 96 | return self.readbyte() == 1 |
|---|
| 97 | |
|---|
| 98 | def readAMFString(self): |
|---|
| 99 | size = self.readshort() |
|---|
| 100 | return self.file.read(size) |
|---|
| 101 | |
|---|
| 102 | def readAMFObject(self): |
|---|
| 103 | data = self.readAMFMixedArray() |
|---|
| 104 | result = object() |
|---|
| 105 | result.__dict__.update(data) |
|---|
| 106 | return result |
|---|
| 107 | |
|---|
| 108 | def readAMFMixedArray(self): |
|---|
| 109 | size = self.readint() |
|---|
| 110 | result = {} |
|---|
| 111 | for i in range(size): |
|---|
| 112 | key = self.readAMFString() |
|---|
| 113 | dataType = self.readbyte() |
|---|
| 114 | if not key and dataType == 9: |
|---|
| 115 | break |
|---|
| 116 | result[key] = self.readAMFData(dataType) |
|---|
| 117 | return result |
|---|
| 118 | |
|---|
| 119 | def readAMFArray(self): |
|---|
| 120 | size = self.readint() |
|---|
| 121 | result = [] |
|---|
| 122 | for i in range(size): |
|---|
| 123 | result.append(self.readAMFData) |
|---|
| 124 | return result |
|---|
| 125 | |
|---|
| 126 | def readAMFDate(self): |
|---|
| 127 | return datetime.fromtimestamp(self.readAMFDouble()) |
|---|
| 128 | |
|---|
| 129 | |
|---|
| 130 | if __name__ == '__main__': |
|---|
| 131 | from pprint import pprint |
|---|
| 132 | fn = "KyodaiNoGilga.flv" |
|---|
| 133 | x = FLVReader(fn) |
|---|
| 134 | pprint(x) |
|---|