import re import requests import math import jsbeautifier testUrl = "https://apple.com/metrics/target/scripts/1.0/at.js" js_keywords = ["break", "case", "catch", "continue", "debugger", "default", "delete", "do", "else", "finally", "for", "function", "if", "in", "instanceof", "new", "return", "switch", "this", "throw", "try", "typeof", "var", "void", "while", "with"] def parseJavascript(context, isUrl): if isUrl: try: r = requests.get(context, allow_redirects=False) except: print("Unexpected Error") return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) temp_code = r.content.decode("ISO-8859-1") else: temp_code = context js_code = jsbeautifier.beautify(temp_code) noOfEvalFunc = len(re.findall("eval\(", js_code)) noOfSetTimeOutFunc = len(re.findall("setTimeout\(", js_code)) noOfiframe = len(re.findall("iframe", js_code)) noOfUnescapeFunc = len(re.findall("unescape\(", js_code)) noOfEscapeFunc = len(re.findall("escape\(", js_code)) noOfClassid = len(re.findall("classid", js_code)) noOfParseIntFunc = len(re.findall("parseInt\(", js_code)) noOfFromCharCodeFunc = len(re.findall("fromCharCode\(", js_code)) noOfActiveXObjectFunc = len(re.findall("ActiveXObject\(", js_code)) noOfStringAssigments = len(re.findall(r'"((?<=\\)"|([^"]))"', js_code)) noOfConcatFunc = len(re.findall("concat\(", js_code)) noOfIndexOfFunc = len(re.findall("indexOf\(", js_code)) noOfSubstringFunc = len(re.findall("substring\(", js_code)) noOfReplaceFunc = len(re.findall("replace\(", js_code)) noOfEventListenerFunc = len(re.findall("document.addEventListener\(", js_code)) noOfAttachEventFunc = len(re.findall("attachEvent\(", js_code)) noOfCreateElementFunc = len(re.findall("createElement\(", js_code)) noOfGetElementByIdFunc = len(re.findall("getElementById\(", js_code)) noOfDocumentWriteFunc = len(re.findall("document.write\(", js_code)) noOfWords = len(re.findall(r'\w+', js_code)) noOfKeyWords = findKeyWords(js_code) noOfCharacters = len(js_code) try: ratioOfKeywordsAndWords = noOfKeyWords / noOfWords except ZeroDivisionError: ratioOfKeywordsAndWords = 0 entropyOfJS = Entropy(js_code) longestWord = len(findTheLongestWord(js_code)) noOfLongStirngs = len(re.findall(r'"((?<=\\)"|([^"])){200,}"', js_code)) shortestWord = len(findTheShortestWord(js_code)) entropyOfLongestWord = Entropy(findTheLongestWord(js_code)) noOfBlankSpaces = len(re.findall(" ", js_code)) try: avgLenOfWords = noOfCharacters / noOfWords except ZeroDivisionError: avgLenOfWords = 0 noOfHexValues = findHexNumbers(js_code) try: shareOfSpaceChar = len(re.findall(" ", js_code)) / noOfCharacters except ZeroDivisionError: shareOfSpaceChar = 0 #print(noOfHexValues) return (noOfEvalFunc, noOfSetTimeOutFunc, noOfiframe, noOfUnescapeFunc, noOfEscapeFunc, noOfClassid, noOfParseIntFunc, noOfFromCharCodeFunc, noOfActiveXObjectFunc, noOfStringAssigments, noOfConcatFunc, noOfIndexOfFunc, noOfSubstringFunc, noOfReplaceFunc, noOfEventListenerFunc, noOfAttachEventFunc, noOfCreateElementFunc, noOfGetElementByIdFunc, noOfDocumentWriteFunc, noOfWords, noOfKeyWords, noOfCharacters, ratioOfKeywordsAndWords, entropyOfJS, longestWord, noOfLongStirngs, shortestWord, entropyOfLongestWord, noOfBlankSpaces, avgLenOfWords, noOfHexValues, shareOfSpaceChar) def Entropy(string,base = 2.0): # I copied this from net dct = dict.fromkeys(list(string)) pkvec = [float(string.count(c)) / len(string) for c in dct] H = -sum([pk * math.log(pk) / math.log(base) for pk in pkvec ]) return H def findTheLongestWord(text): tempList = text.split(' ') return max(tempList, key=len) def findTheShortestWord(text): tempList = text.split(' ') return min(tempList, key=len) """def findLongStings(text): longs = [] temp = re.findall(r'var = .*\".*\"', text) for string in temp: if len(string) > 200: longs.append(string)""" def findKeyWords(text): numberOfKeywords = 0 temp = re.findall(r'\w+', text) for word in temp: if word in js_keywords: numberOfKeywords += 1 temp = re.findall(r'"((?<=\\)"|([^"]))"', text) for word in temp: if word in js_keywords: numberOfKeywords += -1 return numberOfKeywords def findHexNumbers(text): number = len(re.findall(r'0x', text)) + len(re.findall(r'parseInt\([^,]*,? *16\)', text)) return number #print(parseJavascript("https://ajax.googleapis.com/ajax/libs/jquery/3.2.1/jquery.min.js"))