yigitcolakoglu
/
KulYutmaz

import reimport requestsimport mathimport jsbeautifier
testUrl = "https://apple.com/metrics/target/scripts/1.0/at.js"
js_keywords = ["break", "case", "catch", "continue", "debugger", "default", "delete", "do", "else", "finally", "for", "function", "if", "in", "instanceof", "new", "return", "switch", "this", "throw", "try", "typeof", "var", "void", "while", "with"]

def parseJavascript(context, isUrl):    if isUrl:                try:            r = requests.get(context, allow_redirects=False)        except:            print("Unexpected Error")            return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)        temp_code = r.content.decode("ISO-8859-1")    else:        temp_code = context    js_code = jsbeautifier.beautify(temp_code)            noOfEvalFunc = len(re.findall("eval\(", js_code))    noOfSetTimeOutFunc = len(re.findall("setTimeout\(", js_code))    noOfiframe = len(re.findall("iframe", js_code))    noOfUnescapeFunc = len(re.findall("unescape\(", js_code))    noOfEscapeFunc = len(re.findall("escape\(", js_code))    noOfClassid = len(re.findall("classid", js_code))    noOfParseIntFunc = len(re.findall("parseInt\(", js_code))    noOfFromCharCodeFunc = len(re.findall("fromCharCode\(", js_code))    noOfActiveXObjectFunc = len(re.findall("ActiveXObject\(", js_code))    noOfStringAssigments = len(re.findall(r'"((?<=\\)"|([^"]))"', js_code))    noOfConcatFunc = len(re.findall("concat\(", js_code))    noOfIndexOfFunc = len(re.findall("indexOf\(", js_code))    noOfSubstringFunc = len(re.findall("substring\(", js_code))    noOfReplaceFunc = len(re.findall("replace\(", js_code))    noOfEventListenerFunc = len(re.findall("document.addEventListener\(", js_code))    noOfAttachEventFunc = len(re.findall("attachEvent\(", js_code))    noOfCreateElementFunc = len(re.findall("createElement\(", js_code))    noOfGetElementByIdFunc = len(re.findall("getElementById\(", js_code))    noOfDocumentWriteFunc = len(re.findall("document.write\(", js_code))    noOfWords = len(re.findall(r'\w+', js_code))     noOfKeyWords = findKeyWords(js_code)    noOfCharacters = len(js_code)    try:        ratioOfKeywordsAndWords = noOfKeyWords / noOfWords    except ZeroDivisionError:        ratioOfKeywordsAndWords = 0    entropyOfJS = Entropy(js_code)    longestWord = len(findTheLongestWord(js_code))    noOfLongStirngs = len(re.findall(r'"((?<=\\)"|([^"])){200,}"', js_code))    shortestWord = len(findTheShortestWord(js_code))    entropyOfLongestWord = Entropy(findTheLongestWord(js_code))    noOfBlankSpaces = len(re.findall(" ", js_code))    try:        avgLenOfWords = noOfCharacters / noOfWords    except ZeroDivisionError:        avgLenOfWords = 0    noOfHexValues = findHexNumbers(js_code)    try:        shareOfSpaceChar = len(re.findall(" ", js_code)) / noOfCharacters    except ZeroDivisionError:        shareOfSpaceChar = 0    #print(noOfHexValues)    
        return (noOfEvalFunc, noOfSetTimeOutFunc, noOfiframe, noOfUnescapeFunc, noOfEscapeFunc, noOfClassid, noOfParseIntFunc, noOfFromCharCodeFunc, noOfActiveXObjectFunc,    noOfStringAssigments, noOfConcatFunc, noOfIndexOfFunc, noOfSubstringFunc, noOfReplaceFunc, noOfEventListenerFunc, noOfAttachEventFunc, noOfCreateElementFunc, noOfGetElementByIdFunc,    noOfDocumentWriteFunc, noOfWords, noOfKeyWords, noOfCharacters, ratioOfKeywordsAndWords, entropyOfJS, longestWord, noOfLongStirngs, shortestWord, entropyOfLongestWord, noOfBlankSpaces,    avgLenOfWords, noOfHexValues, shareOfSpaceChar)

def Entropy(string,base = 2.0): # I copied this from net    dct = dict.fromkeys(list(string))
    pkvec =  [float(string.count(c)) / len(string) for c in dct]
    H = -sum([pk  * math.log(pk) / math.log(base) for pk in pkvec ])    return H
def findTheLongestWord(text):  tempList = text.split(' ')  return max(tempList, key=len)
def findTheShortestWord(text):  tempList = text.split(' ')  return min(tempList, key=len)
"""def findLongStings(text):
    longs = []    temp = re.findall(r'var = .*\".*\"', text)    for string in temp:        if len(string) > 200:            longs.append(string)"""

def findKeyWords(text):    numberOfKeywords = 0    temp = re.findall(r'\w+', text)    for word in temp:        if word in js_keywords:            numberOfKeywords += 1    temp = re.findall(r'"((?<=\\)"|([^"]))"', text)    for word in temp:        if word in js_keywords:            numberOfKeywords += -1    return numberOfKeywords
def findHexNumbers(text):    number = len(re.findall(r'0x', text)) + len(re.findall(r'parseInt\([^,]*,? *16\)', text))    return number
#print(parseJavascript("https://ajax.googleapis.com/ajax/libs/jquery/3.2.1/jquery.min.js"))