|
|
- import re
- import requests
- import math
- import jsbeautifier
-
- testUrl = "https://apple.com/metrics/target/scripts/1.0/at.js"
-
- js_keywords = ["break", "case", "catch", "continue", "debugger", "default", "delete", "do", "else", "finally", "for", "function", "if", "in", "instanceof", "new", "return", "switch", "this", "throw", "try", "typeof", "var", "void", "while", "with"]
-
-
- def parseJavascript(context, isUrl):
- if isUrl:
- try:
- r = requests.get(context, allow_redirects=False)
- except:
- print("Unexpected Error")
- return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
- temp_code = r.content.decode("ISO-8859-1")
- else:
- temp_code = context
- js_code = jsbeautifier.beautify(temp_code)
-
-
- noOfEvalFunc = len(re.findall("eval\(", js_code))
- noOfSetTimeOutFunc = len(re.findall("setTimeout\(", js_code))
- noOfiframe = len(re.findall("iframe", js_code))
- noOfUnescapeFunc = len(re.findall("unescape\(", js_code))
- noOfEscapeFunc = len(re.findall("escape\(", js_code))
- noOfClassid = len(re.findall("classid", js_code))
- noOfParseIntFunc = len(re.findall("parseInt\(", js_code))
- noOfFromCharCodeFunc = len(re.findall("fromCharCode\(", js_code))
- noOfActiveXObjectFunc = len(re.findall("ActiveXObject\(", js_code))
- noOfStringAssigments = len(re.findall(r'"((?<=\\)"|([^"]))"', js_code))
- noOfConcatFunc = len(re.findall("concat\(", js_code))
- noOfIndexOfFunc = len(re.findall("indexOf\(", js_code))
- noOfSubstringFunc = len(re.findall("substring\(", js_code))
- noOfReplaceFunc = len(re.findall("replace\(", js_code))
- noOfEventListenerFunc = len(re.findall("document.addEventListener\(", js_code))
- noOfAttachEventFunc = len(re.findall("attachEvent\(", js_code))
- noOfCreateElementFunc = len(re.findall("createElement\(", js_code))
- noOfGetElementByIdFunc = len(re.findall("getElementById\(", js_code))
- noOfDocumentWriteFunc = len(re.findall("document.write\(", js_code))
- noOfWords = len(re.findall(r'\w+', js_code))
- noOfKeyWords = findKeyWords(js_code)
- noOfCharacters = len(js_code)
- try:
- ratioOfKeywordsAndWords = noOfKeyWords / noOfWords
- except ZeroDivisionError:
- ratioOfKeywordsAndWords = 0
- entropyOfJS = Entropy(js_code)
- longestWord = len(findTheLongestWord(js_code))
- noOfLongStirngs = len(re.findall(r'"((?<=\\)"|([^"])){200,}"', js_code))
- shortestWord = len(findTheShortestWord(js_code))
- entropyOfLongestWord = Entropy(findTheLongestWord(js_code))
- noOfBlankSpaces = len(re.findall(" ", js_code))
- try:
- avgLenOfWords = noOfCharacters / noOfWords
- except ZeroDivisionError:
- avgLenOfWords = 0
- noOfHexValues = findHexNumbers(js_code)
- try:
- shareOfSpaceChar = len(re.findall(" ", js_code)) / noOfCharacters
- except ZeroDivisionError:
- shareOfSpaceChar = 0
- #print(noOfHexValues)
-
-
-
- return (noOfEvalFunc, noOfSetTimeOutFunc, noOfiframe, noOfUnescapeFunc, noOfEscapeFunc, noOfClassid, noOfParseIntFunc, noOfFromCharCodeFunc, noOfActiveXObjectFunc,
- noOfStringAssigments, noOfConcatFunc, noOfIndexOfFunc, noOfSubstringFunc, noOfReplaceFunc, noOfEventListenerFunc, noOfAttachEventFunc, noOfCreateElementFunc, noOfGetElementByIdFunc,
- noOfDocumentWriteFunc, noOfWords, noOfKeyWords, noOfCharacters, ratioOfKeywordsAndWords, entropyOfJS, longestWord, noOfLongStirngs, shortestWord, entropyOfLongestWord, noOfBlankSpaces,
- avgLenOfWords, noOfHexValues, shareOfSpaceChar)
-
-
- def Entropy(string,base = 2.0): # I copied this from net
- dct = dict.fromkeys(list(string))
-
- pkvec = [float(string.count(c)) / len(string) for c in dct]
-
- H = -sum([pk * math.log(pk) / math.log(base) for pk in pkvec ])
- return H
-
- def findTheLongestWord(text):
- tempList = text.split(' ')
- return max(tempList, key=len)
-
- def findTheShortestWord(text):
- tempList = text.split(' ')
- return min(tempList, key=len)
-
- """def findLongStings(text):
- longs = []
- temp = re.findall(r'var = .*\".*\"', text)
- for string in temp:
- if len(string) > 200:
- longs.append(string)"""
-
- def findKeyWords(text):
- numberOfKeywords = 0
- temp = re.findall(r'\w+', text)
- for word in temp:
- if word in js_keywords:
- numberOfKeywords += 1
- temp = re.findall(r'"((?<=\\)"|([^"]))"', text)
- for word in temp:
- if word in js_keywords:
- numberOfKeywords += -1
- return numberOfKeywords
-
- def findHexNumbers(text):
- number = len(re.findall(r'0x', text)) + len(re.findall(r'parseInt\([^,]*,? *16\)', text))
- return number
-
- #print(parseJavascript("https://ajax.googleapis.com/ajax/libs/jquery/3.2.1/jquery.min.js"))
|