aboutsummaryrefslogtreecommitdiffstats
path: root/ntuceiba/parse.py
diff options
context:
space:
mode:
Diffstat (limited to 'ntuceiba/parse.py')
-rw-r--r--ntuceiba/parse.py119
1 files changed, 119 insertions, 0 deletions
diff --git a/ntuceiba/parse.py b/ntuceiba/parse.py
new file mode 100644
index 0000000..2686ea9
--- /dev/null
+++ b/ntuceiba/parse.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+from datetime import datetime, timedelta
+from lxml import etree
+from ntuceiba import NtuCeibaEvent
+
+class nccolinfo:
+ def __init__(self):
+ self.red = False
+ self.url = None
+ self.data = ""
+
+def ntuceiba_readcol(etrele):
+ rval = nccolinfo()
+ if etrele.text:
+ rval.data = etrele.text
+ elif etrele[0].tag == 'a':
+ subele = etrele[0]
+ rval.url = subele.get('href')
+ if subele.text:
+ rval.data = subele.text
+ else:
+ rval.data = subele[0].text
+ rval.red = True
+ elif etrele[0].tag == 'font':
+ subele = etrele[0]
+ if subele.text:
+ rval.data = subele.text
+ rval.red = True
+ elif subele[0].tag == 'a':
+ rval.url = subele[0].get('href')
+ rval.data = subele[0].text
+ rval.red = True
+ return rval
+
+def ntuceiba_parsedate(instr):
+ if instr == '--':
+ return None
+
+ try:
+ dyear = int(instr[0:4])
+ dmonth = int(instr[5:7])
+ ddate = int(instr[8:10])
+ dhour = int(instr[11:13])
+ except ValueError:
+ return None
+
+ if dhour == 24:
+ rval = datetime(dyear, dmonth, ddate, 0)
+ rval = rval + timedelta(1, 0, 0)
+ else:
+ rval = datetime(dyear, dmonth, ddate, dhour)
+
+ return rval
+
+
+def ntuceiba_parser(fileobj): # 如果第一行是亂七八糟的東西,請先自行讀掉
+ rval = []
+ htmlparser = etree.HTMLParser(encoding='UTF-8')
+ htmltree = etree.parse(fileobj, htmlparser)
+ htmlroot = htmltree.getroot()
+ for hwarea in htmlroot.iter(): # 找出作業的位置
+ if hwarea.tag == 'div' and hwarea.get('id') == 'sect_cont':
+ break
+ else:
+ return None
+
+ allrow = []
+ for item in hwarea.iter():
+ if item.tag == 'tr':
+ allrow.append(item)
+
+ if len(allrow) > 0:
+ allrow.pop(0) # 要去除第一列,因為那是表格標題列
+ else:
+ return rval # 沒有資料就可以直接回傳了
+
+ for row in allrow:
+ ncdata = NtuCeibaEvent()
+
+ sinfo = ntuceiba_readcol(row[0])
+ ncdata.title = sinfo.data
+ ncdata.setred(sinfo.red)
+
+ sinfo = ntuceiba_readcol(row[1])
+ ncdata.member = sinfo.data
+ ncdata.setred(sinfo.red)
+
+ sinfo = ntuceiba_readcol(row[2])
+ ncdata.method = sinfo.data
+ ncdata.setred(sinfo.red)
+
+ sinfo = ntuceiba_readcol(row[3])
+ ncdata.percent = sinfo.data
+ ncdata.setred(sinfo.red)
+
+ sinfo = ntuceiba_readcol(row[4])
+ ncdata.duedate = ntuceiba_parsedate(sinfo.data)
+ ncdata.setred(sinfo.red)
+
+ sinfo = ntuceiba_readcol(row[5])
+ if sinfo.data == 'Yes' or sinfo.data == u'可以':
+ ncdata.late = True
+ else:
+ ncdata.late = False
+ ncdata.setred(sinfo.red)
+
+ sinfo = ntuceiba_readcol(row[6])
+ ncdata.subdate = ntuceiba_parsedate(sinfo.data)
+ ncdata.setred(sinfo.red)
+
+ sinfo = ntuceiba_readcol(row[7])
+ ncdata.comment = sinfo.data
+ ncdata.setred(sinfo.red)
+
+ rval.append(ncdata)
+
+ return rval