| name | cobol-parsing |
| description | Parse and analyze COBOL source code structure, data divisions, and procedure logic. Use when migrating COBOL applications, analyzing legacy mainframe code, or extracting business rules from COBOL programs. |
COBOL Parsing
Extract and understand COBOL program structure and business logic.
COBOL Program Structure
import re
def parse_cobol_divisions(cobol_source):
"""Extract COBOL divisions from source."""
divisions = {
'IDENTIFICATION': '',
'ENVIRONMENT': '',
'DATA': '',
'PROCEDURE': ''
}
current_division = None
for line in cobol_source.split('\n'):
# Check for division headers
if 'IDENTIFICATION DIVISION' in line:
current_division = 'IDENTIFICATION'
elif 'ENVIRONMENT DIVISION' in line:
current_division = 'ENVIRONMENT'
elif 'DATA DIVISION' in line:
current_division = 'DATA'
elif 'PROCEDURE DIVISION' in line:
current_division = 'PROCEDURE'
elif current_division:
divisions[current_division] += line + '\n'
return divisions
Data Division Parsing
def parse_data_items(data_division):
"""Parse COBOL data items."""
items = []
pattern = r'(\d+)\s+(\S+)\s+PIC\s+([^\s.]+)'
for line in data_division.split('\n'):
match = re.search(pattern, line)
if match:
level, name, pic = match.groups()
items.append({
'level': int(level),
'name': name,
'picture': pic,
'type': infer_type(pic)
})
return items
def infer_type(picture):
"""Infer data type from PIC clause."""
if 'X' in picture:
return 'string'
elif 'V' in picture or '.' in picture:
return 'decimal'
elif '9' in picture:
return 'integer'
return 'unknown'
PERFORM Statement Analysis
def extract_perform_statements(procedure_division):
"""Extract PERFORM statements (subroutine calls)."""
pattern = r'PERFORM\s+([A-Z0-9-]+)'
performs = re.findall(pattern, procedure_division)
return list(set(performs))
def find_paragraph(procedure_division, paragraph_name):
"""Find a specific paragraph in procedure division."""
pattern = f'{paragraph_name}\\s*\\.(.*?)(?=[A-Z-]+\\s*\\.|$)'
match = re.search(pattern, procedure_division, re.DOTALL)
return match.group(1).strip() if match else None
COMPUTE Statement Extraction
def extract_computations(procedure_division):
"""Extract COMPUTE statements with formulas."""
computations = []
pattern = r'COMPUTE\s+([A-Z0-9-]+)\s*=\s*(.+?)(?:\.|$)'
for match in re.finditer(pattern, procedure_division, re.MULTILINE):
var_name = match.group(1)
formula = match.group(2).strip()
computations.append({
'variable': var_name,
'formula': formula
})
return computations
File Operations
def parse_file_control(environment_division):
"""Parse FILE-CONTROL section."""
files = []
pattern = r'SELECT\s+([A-Z0-9-]+)\s+ASSIGN\s+TO\s+([^\s.]+)'
for match in re.finditer(pattern, environment_division):
files.append({
'internal_name': match.group(1),
'external_name': match.group(2)
})
return files
Complete Parser Example
class COBOLParser:
def __init__(self, source):
self.source = source
self.divisions = parse_cobol_divisions(source)
def get_data_items(self):
return parse_data_items(self.divisions['DATA'])
def get_computations(self):
return extract_computations(self.divisions['PROCEDURE'])
def get_program_flow(self):
return extract_perform_statements(self.divisions['PROCEDURE'])
# Usage
with open('legacy.cob', 'r') as f:
parser = COBOLParser(f.read())
data_items = parser.get_data_items()
computations = parser.get_computations()