132 lines
3.5 KiB
Python
132 lines
3.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
|
|
SEC Filing Scraper
|
|
@author: AdamGetbags
|
|
|
|
"""
|
|
|
|
# import modules
|
|
import requests
|
|
import pandas as pd
|
|
|
|
# create request header
|
|
headers = {'User-Agent': "email@address.com"}
|
|
|
|
# get all companies data
|
|
companyTickers = requests.get(
|
|
"https://www.sec.gov/files/company_tickers.json",
|
|
headers=headers
|
|
)
|
|
|
|
# review response / keys
|
|
print(companyTickers.json().keys())
|
|
|
|
# format response to dictionary and get first key/value
|
|
firstEntry = companyTickers.json()['0']
|
|
|
|
# parse CIK // without leading zeros
|
|
directCik = companyTickers.json()['0']['cik_str']
|
|
|
|
# dictionary to dataframe
|
|
companyData = pd.DataFrame.from_dict(companyTickers.json(),
|
|
orient='index')
|
|
|
|
# add leading zeros to CIK
|
|
companyData['cik_str'] = companyData['cik_str'].astype(
|
|
str).str.zfill(10)
|
|
|
|
# review data
|
|
print(companyData[:1])
|
|
|
|
cik = companyData[0:1].cik_str[0]
|
|
|
|
# get company specific filing metadata
|
|
filingMetadata = requests.get(
|
|
f'https://data.sec.gov/submissions/CIK{cik}.json',
|
|
headers=headers
|
|
)
|
|
|
|
# review json
|
|
print(filingMetadata.json().keys())
|
|
filingMetadata.json()['filings']
|
|
filingMetadata.json()['filings'].keys()
|
|
filingMetadata.json()['filings']['recent']
|
|
filingMetadata.json()['filings']['recent'].keys()
|
|
|
|
# dictionary to dataframe
|
|
allForms = pd.DataFrame.from_dict(
|
|
filingMetadata.json()['filings']['recent']
|
|
)
|
|
|
|
# review columns
|
|
allForms.columns
|
|
allForms[['accessionNumber', 'reportDate', 'form']].head(50)
|
|
|
|
# 10-Q metadata
|
|
allForms.iloc[11]
|
|
|
|
# get company facts data
|
|
companyFacts = requests.get(
|
|
f'https://data.sec.gov/api/xbrl/companyfacts/CIK{cik}.json',
|
|
headers=headers
|
|
)
|
|
|
|
#review data
|
|
companyFacts.json().keys()
|
|
companyFacts.json()['facts']
|
|
companyFacts.json()['facts'].keys()
|
|
|
|
# filing metadata
|
|
companyFacts.json()['facts']['dei'][
|
|
'EntityCommonStockSharesOutstanding']
|
|
companyFacts.json()['facts']['dei'][
|
|
'EntityCommonStockSharesOutstanding'].keys()
|
|
companyFacts.json()['facts']['dei'][
|
|
'EntityCommonStockSharesOutstanding']['units']
|
|
companyFacts.json()['facts']['dei'][
|
|
'EntityCommonStockSharesOutstanding']['units']['shares']
|
|
companyFacts.json()['facts']['dei'][
|
|
'EntityCommonStockSharesOutstanding']['units']['shares'][0]
|
|
|
|
# concept data // financial statement line items
|
|
companyFacts.json()['facts']['us-gaap']
|
|
companyFacts.json()['facts']['us-gaap'].keys()
|
|
|
|
# different amounts of data available per concept
|
|
companyFacts.json()['facts']['us-gaap']['AccountsPayable']
|
|
companyFacts.json()['facts']['us-gaap']['Revenues']
|
|
companyFacts.json()['facts']['us-gaap']['Assets']
|
|
|
|
# get company concept data
|
|
companyConcept = requests.get(
|
|
(
|
|
f'https://data.sec.gov/api/xbrl/companyconcept/CIK{cik}'
|
|
f'/us-gaap/Assets.json'
|
|
),
|
|
headers=headers
|
|
)
|
|
|
|
# review data
|
|
companyConcept.json().keys()
|
|
companyConcept.json()['units']
|
|
companyConcept.json()['units'].keys()
|
|
companyConcept.json()['units']['USD']
|
|
companyConcept.json()['units']['USD'][0]
|
|
|
|
# parse assets from single filing
|
|
companyConcept.json()['units']['USD'][0]['val']
|
|
|
|
# get all filings data
|
|
assetsData = pd.DataFrame.from_dict((
|
|
companyConcept.json()['units']['USD']))
|
|
|
|
# review data
|
|
assetsData.columns
|
|
assetsData.form
|
|
|
|
# get assets from 10Q forms and reset index
|
|
assets10Q = assetsData[assetsData.form == '10-Q']
|
|
assets10Q = assets10Q.reset_index(drop=True)
|
|
|
|
print(assets10Q) |