-
Notifications
You must be signed in to change notification settings - Fork 6
Open
Description
import json
import operator
from collections import defaultdict
from detect import get_top_bigrams, parse_sample
def check_common_bigrams(bigrams, bigram_data):
langs = {}
for lang, lang_bigram in bigram_data.items():
match = 0
for bigram in bigrams:
if bigram in lang_bigram:
match += 1
langs[lang] = match
return langs
def main(sample):
counts = defaultdict(int)
parse_sample(counts, text=sample)
bigrams = get_top_bigrams(counts)
trained = json.load(open('output.json'))
results = (check_common_bigrams(bigrams, trained))
print(sorted(results.items(), key=operator.itemgetter(1), reverse=True)[0], repr(sample))
if __name__ == "__main__":
for line in open("test_100.json"):
main(json.loads(line)['text'])
Metadata
Metadata
Assignees
Labels
No labels